diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..593f2ba684df00ce4efa25956ef2e580614a7b59 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,23 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1000/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1100/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1300/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1400/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1600/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1700/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1800/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1875/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-400/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-600/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-700/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-800/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-900/tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b5f93b35a31b76a196c1bc096642d1348bfeb7ae --- /dev/null +++ b/README.md @@ -0,0 +1,56 @@ +--- +library_name: peft +license: other +base_model: meta-llama/Llama-3.2-1B +tags: +- llama-factory +- lora +- generated_from_trainer +model-index: +- name: llama3.2-1b + results: [] +--- + + + +# llama3.2-1b + +This model is a fine-tuned version of [meta-llama/Llama-3.2-1B](https://huggingface.co/meta-llama/Llama-3.2-1B) on the mathinstruct dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 0.0002 +- train_batch_size: 16 +- eval_batch_size: 8 +- seed: 42 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.12.0 +- Transformers 4.46.1 +- Pytorch 2.4.1+cu124 +- Datasets 3.1.0 +- Tokenizers 0.20.3 \ No newline at end of file diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73202e5c7ba7132c890e52da7442b36a9efff4dd --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.2-1B", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "down_proj", + "k_proj", + "gate_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8dccbd00f8b654b41b87d53e00ede0c2d39d4943 --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b6ca1a286feb5f305d1377fb4b2b4ba22d9b9fc3346cd70c7a0b6f26c723ab9 +size 22573704 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9aecdedbe38ee7488418ffeffde0ced048359694 --- /dev/null +++ b/all_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 3.0, + "total_flos": 1.1764404625814323e+17, + "train_loss": 0.6465437274932861, + "train_runtime": 2365.2475, + "train_samples_per_second": 12.684, + "train_steps_per_second": 0.793 +} \ No newline at end of file diff --git a/checkpoint-100/README.md b/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..31d07dbe00996b0568fd0eb91d05ac9a95722459 --- /dev/null +++ b/checkpoint-100/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.2-1B +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-100/adapter_config.json b/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73202e5c7ba7132c890e52da7442b36a9efff4dd --- /dev/null +++ b/checkpoint-100/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.2-1B", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "down_proj", + "k_proj", + "gate_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-100/adapter_model.safetensors b/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c7254c9a587970bc6dd97f4e636cbcc2707d1ffd --- /dev/null +++ b/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17ebfa8c8446a8383d729ac24bfa650392fd3ecd4c58d2efeff0073419e84fcb +size 22573704 diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0174857f21c9816e4c3c5c9cd66c444515e6a58c --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bea1c039fc83872211ec8ea68aa68159022783f3ccb33efdc3d0469417df1b70 +size 45276986 diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..93a52228ea16468eaab41b66555ced3eaaea002c --- /dev/null +++ b/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:386fcc8cc1089aade9450d86fb239ea3483f455fd2d78d8378645feecfec9d69 +size 14244 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..60b5ff8f5384a684a269526e7c23e03a984fbb77 --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eeaf5f81794f7e3eff407ed147d07602da6499cdf39f79193d6603bdbaef56b0 +size 1064 diff --git a/checkpoint-100/special_tokens_map.json b/checkpoint-100/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/checkpoint-100/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/checkpoint-100/tokenizer.json b/checkpoint-100/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-100/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-100/tokenizer_config.json b/checkpoint-100/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61c03bf2b5e0c03056cebf7a58c8c096dafc1ffc --- /dev/null +++ b/checkpoint-100/tokenizer_config.json @@ -0,0 +1,2065 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end_of_text|>' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|end_of_text|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e8e9bf9d78d729f9c3ed94741ba2a0f189d35251 --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,173 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.16, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008, + "grad_norm": 0.8485889434814453, + "learning_rate": 0.0001999964908278481, + "loss": 1.2049, + "step": 5 + }, + { + "epoch": 0.016, + "grad_norm": 0.47789862751960754, + "learning_rate": 0.00019998596355767805, + "loss": 0.9333, + "step": 10 + }, + { + "epoch": 0.024, + "grad_norm": 1.017558217048645, + "learning_rate": 0.00019996841892833, + "loss": 0.8671, + "step": 15 + }, + { + "epoch": 0.032, + "grad_norm": 0.6610977053642273, + "learning_rate": 0.00019994385817114646, + "loss": 0.7979, + "step": 20 + }, + { + "epoch": 0.04, + "grad_norm": 0.6075429320335388, + "learning_rate": 0.00019991228300988585, + "loss": 0.7662, + "step": 25 + }, + { + "epoch": 0.048, + "grad_norm": 0.6595763564109802, + "learning_rate": 0.00019987369566060176, + "loss": 0.7929, + "step": 30 + }, + { + "epoch": 0.056, + "grad_norm": 0.6968618035316467, + "learning_rate": 0.00019982809883148722, + "loss": 0.7683, + "step": 35 + }, + { + "epoch": 0.064, + "grad_norm": 0.4889592230319977, + "learning_rate": 0.00019977549572268468, + "loss": 0.8667, + "step": 40 + }, + { + "epoch": 0.072, + "grad_norm": 0.6651108264923096, + "learning_rate": 0.0001997158900260614, + "loss": 0.8446, + "step": 45 + }, + { + "epoch": 0.08, + "grad_norm": 0.5898510217666626, + "learning_rate": 0.00019964928592495045, + "loss": 0.9051, + "step": 50 + }, + { + "epoch": 0.088, + "grad_norm": 0.4398016035556793, + "learning_rate": 0.00019957568809385694, + "loss": 0.7235, + "step": 55 + }, + { + "epoch": 0.096, + "grad_norm": 0.6901968121528625, + "learning_rate": 0.00019949510169813003, + "loss": 0.8169, + "step": 60 + }, + { + "epoch": 0.104, + "grad_norm": 0.6267213225364685, + "learning_rate": 0.00019940753239360047, + "loss": 0.8266, + "step": 65 + }, + { + "epoch": 0.112, + "grad_norm": 0.48524895310401917, + "learning_rate": 0.00019931298632618356, + "loss": 0.758, + "step": 70 + }, + { + "epoch": 0.12, + "grad_norm": 0.5294132232666016, + "learning_rate": 0.0001992114701314478, + "loss": 0.7759, + "step": 75 + }, + { + "epoch": 0.128, + "grad_norm": 0.48957982659339905, + "learning_rate": 0.0001991029909341493, + "loss": 0.7797, + "step": 80 + }, + { + "epoch": 0.136, + "grad_norm": 0.645412802696228, + "learning_rate": 0.00019898755634773158, + "loss": 0.7437, + "step": 85 + }, + { + "epoch": 0.144, + "grad_norm": 0.43297675251960754, + "learning_rate": 0.0001988651744737914, + "loss": 0.8043, + "step": 90 + }, + { + "epoch": 0.152, + "grad_norm": 0.5513920783996582, + "learning_rate": 0.00019873585390151003, + "loss": 0.7701, + "step": 95 + }, + { + "epoch": 0.16, + "grad_norm": 0.8462435007095337, + "learning_rate": 0.0001985996037070505, + "loss": 0.709, + "step": 100 + } + ], + "logging_steps": 5, + "max_steps": 1875, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6310561256570880.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b72392a9094690809605a9b89988a05f68c770c0 --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f6303cd7c1754cff6fd463f0e4dddf2a566ba7f29bbf2a72fbb560b32d5ce98 +size 5432 diff --git a/checkpoint-1000/README.md b/checkpoint-1000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..31d07dbe00996b0568fd0eb91d05ac9a95722459 --- /dev/null +++ b/checkpoint-1000/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.2-1B +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-1000/adapter_config.json b/checkpoint-1000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73202e5c7ba7132c890e52da7442b36a9efff4dd --- /dev/null +++ b/checkpoint-1000/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.2-1B", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "down_proj", + "k_proj", + "gate_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1000/adapter_model.safetensors b/checkpoint-1000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ac8872cd81a2df0d6790354915714d02c920d561 --- /dev/null +++ b/checkpoint-1000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:801dbf65e3cf855f18cf1685dd560a12a0a65a509250b38b2e2a4ae2a0e77ff3 +size 22573704 diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..58f043f851e3d6b4618384b5ead113ecd2a34bc5 --- /dev/null +++ b/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3b5765022122d78d8449c5e2f67f8459c8fc4f1a037a31b98ef1047a77ab921 +size 45276986 diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..33cefe6919222ddfa3c3946df69b8e5c5a17a0fc --- /dev/null +++ b/checkpoint-1000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ff264f99d31b522cc7e2a4eac9d38606d0c58a34c0adc74d71e0ca8b371dc36 +size 14244 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e0c21f26fbe9826bba53e91743146a6f18a4468 --- /dev/null +++ b/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aed3fde651f04e541c7e2da7b8b58e5b8fc9fcf1dacfc8254146682f84077c9 +size 1064 diff --git a/checkpoint-1000/special_tokens_map.json b/checkpoint-1000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/checkpoint-1000/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/checkpoint-1000/tokenizer.json b/checkpoint-1000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-1000/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-1000/tokenizer_config.json b/checkpoint-1000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61c03bf2b5e0c03056cebf7a58c8c096dafc1ffc --- /dev/null +++ b/checkpoint-1000/tokenizer_config.json @@ -0,0 +1,2065 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end_of_text|>' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|end_of_text|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ec34e10aa8c78ece2c0047bcf0a6da02ef47841c --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,1433 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.6, + "eval_steps": 500, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008, + "grad_norm": 0.8485889434814453, + "learning_rate": 0.0001999964908278481, + "loss": 1.2049, + "step": 5 + }, + { + "epoch": 0.016, + "grad_norm": 0.47789862751960754, + "learning_rate": 0.00019998596355767805, + "loss": 0.9333, + "step": 10 + }, + { + "epoch": 0.024, + "grad_norm": 1.017558217048645, + "learning_rate": 0.00019996841892833, + "loss": 0.8671, + "step": 15 + }, + { + "epoch": 0.032, + "grad_norm": 0.6610977053642273, + "learning_rate": 0.00019994385817114646, + "loss": 0.7979, + "step": 20 + }, + { + "epoch": 0.04, + "grad_norm": 0.6075429320335388, + "learning_rate": 0.00019991228300988585, + "loss": 0.7662, + "step": 25 + }, + { + "epoch": 0.048, + "grad_norm": 0.6595763564109802, + "learning_rate": 0.00019987369566060176, + "loss": 0.7929, + "step": 30 + }, + { + "epoch": 0.056, + "grad_norm": 0.6968618035316467, + "learning_rate": 0.00019982809883148722, + "loss": 0.7683, + "step": 35 + }, + { + "epoch": 0.064, + "grad_norm": 0.4889592230319977, + "learning_rate": 0.00019977549572268468, + "loss": 0.8667, + "step": 40 + }, + { + "epoch": 0.072, + "grad_norm": 0.6651108264923096, + "learning_rate": 0.0001997158900260614, + "loss": 0.8446, + "step": 45 + }, + { + "epoch": 0.08, + "grad_norm": 0.5898510217666626, + "learning_rate": 0.00019964928592495045, + "loss": 0.9051, + "step": 50 + }, + { + "epoch": 0.088, + "grad_norm": 0.4398016035556793, + "learning_rate": 0.00019957568809385694, + "loss": 0.7235, + "step": 55 + }, + { + "epoch": 0.096, + "grad_norm": 0.6901968121528625, + "learning_rate": 0.00019949510169813003, + "loss": 0.8169, + "step": 60 + }, + { + "epoch": 0.104, + "grad_norm": 0.6267213225364685, + "learning_rate": 0.00019940753239360047, + "loss": 0.8266, + "step": 65 + }, + { + "epoch": 0.112, + "grad_norm": 0.48524895310401917, + "learning_rate": 0.00019931298632618356, + "loss": 0.758, + "step": 70 + }, + { + "epoch": 0.12, + "grad_norm": 0.5294132232666016, + "learning_rate": 0.0001992114701314478, + "loss": 0.7759, + "step": 75 + }, + { + "epoch": 0.128, + "grad_norm": 0.48957982659339905, + "learning_rate": 0.0001991029909341493, + "loss": 0.7797, + "step": 80 + }, + { + "epoch": 0.136, + "grad_norm": 0.645412802696228, + "learning_rate": 0.00019898755634773158, + "loss": 0.7437, + "step": 85 + }, + { + "epoch": 0.144, + "grad_norm": 0.43297675251960754, + "learning_rate": 0.0001988651744737914, + "loss": 0.8043, + "step": 90 + }, + { + "epoch": 0.152, + "grad_norm": 0.5513920783996582, + "learning_rate": 0.00019873585390151003, + "loss": 0.7701, + "step": 95 + }, + { + "epoch": 0.16, + "grad_norm": 0.8462435007095337, + "learning_rate": 0.0001985996037070505, + "loss": 0.709, + "step": 100 + }, + { + "epoch": 0.168, + "grad_norm": 0.6892585158348083, + "learning_rate": 0.00019845643345292054, + "loss": 0.7377, + "step": 105 + }, + { + "epoch": 0.176, + "grad_norm": 0.4617864191532135, + "learning_rate": 0.00019830635318730154, + "loss": 0.8352, + "step": 110 + }, + { + "epoch": 0.184, + "grad_norm": 0.6300354599952698, + "learning_rate": 0.0001981493734433433, + "loss": 0.7738, + "step": 115 + }, + { + "epoch": 0.192, + "grad_norm": 0.8086859583854675, + "learning_rate": 0.0001979855052384247, + "loss": 0.8067, + "step": 120 + }, + { + "epoch": 0.2, + "grad_norm": 0.6272985935211182, + "learning_rate": 0.00019781476007338058, + "loss": 0.7456, + "step": 125 + }, + { + "epoch": 0.208, + "grad_norm": 0.44750839471817017, + "learning_rate": 0.00019763714993169452, + "loss": 0.758, + "step": 130 + }, + { + "epoch": 0.216, + "grad_norm": 0.5053977370262146, + "learning_rate": 0.00019745268727865774, + "loss": 0.7895, + "step": 135 + }, + { + "epoch": 0.224, + "grad_norm": 0.41920769214630127, + "learning_rate": 0.00019726138506049438, + "loss": 0.7302, + "step": 140 + }, + { + "epoch": 0.232, + "grad_norm": 0.38280290365219116, + "learning_rate": 0.00019706325670345275, + "loss": 0.8152, + "step": 145 + }, + { + "epoch": 0.24, + "grad_norm": 0.554710865020752, + "learning_rate": 0.0001968583161128631, + "loss": 0.8461, + "step": 150 + }, + { + "epoch": 0.248, + "grad_norm": 0.5612509250640869, + "learning_rate": 0.00019664657767216176, + "loss": 0.7787, + "step": 155 + }, + { + "epoch": 0.256, + "grad_norm": 0.610614538192749, + "learning_rate": 0.00019642805624188147, + "loss": 0.7574, + "step": 160 + }, + { + "epoch": 0.264, + "grad_norm": 0.679517924785614, + "learning_rate": 0.0001962027671586086, + "loss": 0.8487, + "step": 165 + }, + { + "epoch": 0.272, + "grad_norm": 0.6685434579849243, + "learning_rate": 0.00019597072623390668, + "loss": 0.6611, + "step": 170 + }, + { + "epoch": 0.28, + "grad_norm": 0.480293869972229, + "learning_rate": 0.00019573194975320673, + "loss": 0.7802, + "step": 175 + }, + { + "epoch": 0.288, + "grad_norm": 0.7727369070053101, + "learning_rate": 0.00019548645447466431, + "loss": 0.6727, + "step": 180 + }, + { + "epoch": 0.296, + "grad_norm": 0.6371043920516968, + "learning_rate": 0.00019523425762798329, + "loss": 0.7502, + "step": 185 + }, + { + "epoch": 0.304, + "grad_norm": 0.6399966478347778, + "learning_rate": 0.00019497537691320668, + "loss": 0.8401, + "step": 190 + }, + { + "epoch": 0.312, + "grad_norm": 0.7263137698173523, + "learning_rate": 0.00019470983049947444, + "loss": 0.7494, + "step": 195 + }, + { + "epoch": 0.32, + "grad_norm": 0.402416467666626, + "learning_rate": 0.00019443763702374812, + "loss": 0.7842, + "step": 200 + }, + { + "epoch": 0.328, + "grad_norm": 0.6639626026153564, + "learning_rate": 0.00019415881558950302, + "loss": 0.8082, + "step": 205 + }, + { + "epoch": 0.336, + "grad_norm": 0.5801042914390564, + "learning_rate": 0.00019387338576538744, + "loss": 0.7883, + "step": 210 + }, + { + "epoch": 0.344, + "grad_norm": 0.5533607006072998, + "learning_rate": 0.00019358136758384912, + "loss": 0.7356, + "step": 215 + }, + { + "epoch": 0.352, + "grad_norm": 0.6019654273986816, + "learning_rate": 0.00019328278153972947, + "loss": 0.7891, + "step": 220 + }, + { + "epoch": 0.36, + "grad_norm": 0.5344104170799255, + "learning_rate": 0.00019297764858882514, + "loss": 0.7671, + "step": 225 + }, + { + "epoch": 0.368, + "grad_norm": 0.5494843125343323, + "learning_rate": 0.0001926659901464172, + "loss": 0.6608, + "step": 230 + }, + { + "epoch": 0.376, + "grad_norm": 0.465420126914978, + "learning_rate": 0.00019234782808576824, + "loss": 0.647, + "step": 235 + }, + { + "epoch": 0.384, + "grad_norm": 0.5202775001525879, + "learning_rate": 0.00019202318473658705, + "loss": 0.729, + "step": 240 + }, + { + "epoch": 0.392, + "grad_norm": 0.5757818222045898, + "learning_rate": 0.00019169208288346166, + "loss": 0.6713, + "step": 245 + }, + { + "epoch": 0.4, + "grad_norm": 0.46555572748184204, + "learning_rate": 0.0001913545457642601, + "loss": 0.7049, + "step": 250 + }, + { + "epoch": 0.408, + "grad_norm": 0.5101790428161621, + "learning_rate": 0.00019101059706849957, + "loss": 0.7419, + "step": 255 + }, + { + "epoch": 0.416, + "grad_norm": 0.6083744764328003, + "learning_rate": 0.00019066026093568378, + "loss": 0.7148, + "step": 260 + }, + { + "epoch": 0.424, + "grad_norm": 0.4719640612602234, + "learning_rate": 0.00019030356195360874, + "loss": 0.7493, + "step": 265 + }, + { + "epoch": 0.432, + "grad_norm": 0.7365225553512573, + "learning_rate": 0.0001899405251566371, + "loss": 0.7652, + "step": 270 + }, + { + "epoch": 0.44, + "grad_norm": 0.4452705383300781, + "learning_rate": 0.0001895711760239413, + "loss": 0.7438, + "step": 275 + }, + { + "epoch": 0.448, + "grad_norm": 0.6071786284446716, + "learning_rate": 0.0001891955404777151, + "loss": 0.7683, + "step": 280 + }, + { + "epoch": 0.456, + "grad_norm": 0.5774498581886292, + "learning_rate": 0.00018881364488135448, + "loss": 0.8115, + "step": 285 + }, + { + "epoch": 0.464, + "grad_norm": 0.6134682893753052, + "learning_rate": 0.00018842551603760724, + "loss": 0.8335, + "step": 290 + }, + { + "epoch": 0.472, + "grad_norm": 0.4869893193244934, + "learning_rate": 0.00018803118118669202, + "loss": 0.6933, + "step": 295 + }, + { + "epoch": 0.48, + "grad_norm": 0.6457111239433289, + "learning_rate": 0.00018763066800438636, + "loss": 0.7515, + "step": 300 + }, + { + "epoch": 0.488, + "grad_norm": 0.59674471616745, + "learning_rate": 0.0001872240046000844, + "loss": 0.6931, + "step": 305 + }, + { + "epoch": 0.496, + "grad_norm": 0.44608160853385925, + "learning_rate": 0.00018681121951482393, + "loss": 0.782, + "step": 310 + }, + { + "epoch": 0.504, + "grad_norm": 0.5934664607048035, + "learning_rate": 0.00018639234171928353, + "loss": 0.7361, + "step": 315 + }, + { + "epoch": 0.512, + "grad_norm": 0.49716323614120483, + "learning_rate": 0.0001859674006117491, + "loss": 0.7443, + "step": 320 + }, + { + "epoch": 0.52, + "grad_norm": 0.47995495796203613, + "learning_rate": 0.00018553642601605068, + "loss": 0.7221, + "step": 325 + }, + { + "epoch": 0.528, + "grad_norm": 0.5177399516105652, + "learning_rate": 0.00018509944817946922, + "loss": 0.7622, + "step": 330 + }, + { + "epoch": 0.536, + "grad_norm": 0.6638798713684082, + "learning_rate": 0.0001846564977706138, + "loss": 0.8556, + "step": 335 + }, + { + "epoch": 0.544, + "grad_norm": 0.5056771636009216, + "learning_rate": 0.00018420760587726923, + "loss": 0.7814, + "step": 340 + }, + { + "epoch": 0.552, + "grad_norm": 0.44543707370758057, + "learning_rate": 0.0001837528040042142, + "loss": 0.722, + "step": 345 + }, + { + "epoch": 0.56, + "grad_norm": 0.6765120625495911, + "learning_rate": 0.00018329212407100994, + "loss": 0.7903, + "step": 350 + }, + { + "epoch": 0.568, + "grad_norm": 0.49232372641563416, + "learning_rate": 0.00018282559840976042, + "loss": 0.6996, + "step": 355 + }, + { + "epoch": 0.576, + "grad_norm": 0.47392791509628296, + "learning_rate": 0.00018235325976284275, + "loss": 0.773, + "step": 360 + }, + { + "epoch": 0.584, + "grad_norm": 0.5056615471839905, + "learning_rate": 0.00018187514128060946, + "loss": 0.728, + "step": 365 + }, + { + "epoch": 0.592, + "grad_norm": 0.5857616662979126, + "learning_rate": 0.00018139127651906184, + "loss": 0.7659, + "step": 370 + }, + { + "epoch": 0.6, + "grad_norm": 0.5966864228248596, + "learning_rate": 0.00018090169943749476, + "loss": 0.7039, + "step": 375 + }, + { + "epoch": 0.608, + "grad_norm": 0.4524347484111786, + "learning_rate": 0.00018040644439611348, + "loss": 0.7125, + "step": 380 + }, + { + "epoch": 0.616, + "grad_norm": 0.5570976138114929, + "learning_rate": 0.00017990554615362198, + "loss": 0.698, + "step": 385 + }, + { + "epoch": 0.624, + "grad_norm": 0.6045777201652527, + "learning_rate": 0.00017939903986478355, + "loss": 0.8255, + "step": 390 + }, + { + "epoch": 0.632, + "grad_norm": 0.6149687767028809, + "learning_rate": 0.00017888696107795342, + "loss": 0.6616, + "step": 395 + }, + { + "epoch": 0.64, + "grad_norm": 0.4873579144477844, + "learning_rate": 0.000178369345732584, + "loss": 0.7452, + "step": 400 + }, + { + "epoch": 0.648, + "grad_norm": 0.5569061636924744, + "learning_rate": 0.00017784623015670238, + "loss": 0.7652, + "step": 405 + }, + { + "epoch": 0.656, + "grad_norm": 0.5825181603431702, + "learning_rate": 0.00017731765106436073, + "loss": 0.7793, + "step": 410 + }, + { + "epoch": 0.664, + "grad_norm": 0.4047383666038513, + "learning_rate": 0.00017678364555305978, + "loss": 0.6875, + "step": 415 + }, + { + "epoch": 0.672, + "grad_norm": 0.5080836415290833, + "learning_rate": 0.0001762442511011448, + "loss": 0.7465, + "step": 420 + }, + { + "epoch": 0.68, + "grad_norm": 0.5825940370559692, + "learning_rate": 0.00017569950556517566, + "loss": 0.7205, + "step": 425 + }, + { + "epoch": 0.688, + "grad_norm": 0.476992666721344, + "learning_rate": 0.00017514944717726962, + "loss": 0.6589, + "step": 430 + }, + { + "epoch": 0.696, + "grad_norm": 0.7424727082252502, + "learning_rate": 0.00017459411454241822, + "loss": 0.7035, + "step": 435 + }, + { + "epoch": 0.704, + "grad_norm": 0.6544787287712097, + "learning_rate": 0.00017403354663577783, + "loss": 0.787, + "step": 440 + }, + { + "epoch": 0.712, + "grad_norm": 0.49425187706947327, + "learning_rate": 0.00017346778279993415, + "loss": 0.7515, + "step": 445 + }, + { + "epoch": 0.72, + "grad_norm": 0.5473236441612244, + "learning_rate": 0.00017289686274214118, + "loss": 0.7199, + "step": 450 + }, + { + "epoch": 0.728, + "grad_norm": 0.6773544549942017, + "learning_rate": 0.00017232082653153422, + "loss": 0.8037, + "step": 455 + }, + { + "epoch": 0.736, + "grad_norm": 0.6355096101760864, + "learning_rate": 0.00017173971459631787, + "loss": 0.7502, + "step": 460 + }, + { + "epoch": 0.744, + "grad_norm": 0.47867000102996826, + "learning_rate": 0.00017115356772092857, + "loss": 0.7446, + "step": 465 + }, + { + "epoch": 0.752, + "grad_norm": 0.5135357975959778, + "learning_rate": 0.0001705624270431721, + "loss": 0.6507, + "step": 470 + }, + { + "epoch": 0.76, + "grad_norm": 0.48866042494773865, + "learning_rate": 0.00016996633405133655, + "loss": 0.7164, + "step": 475 + }, + { + "epoch": 0.768, + "grad_norm": 0.5892354249954224, + "learning_rate": 0.0001693653305812805, + "loss": 0.7621, + "step": 480 + }, + { + "epoch": 0.776, + "grad_norm": 0.6633970141410828, + "learning_rate": 0.00016875945881349676, + "loss": 0.7623, + "step": 485 + }, + { + "epoch": 0.784, + "grad_norm": 0.6444060802459717, + "learning_rate": 0.000168148761270152, + "loss": 0.6606, + "step": 490 + }, + { + "epoch": 0.792, + "grad_norm": 0.7012648582458496, + "learning_rate": 0.00016753328081210245, + "loss": 0.6941, + "step": 495 + }, + { + "epoch": 0.8, + "grad_norm": 0.7064160704612732, + "learning_rate": 0.00016691306063588583, + "loss": 0.6841, + "step": 500 + }, + { + "epoch": 0.808, + "grad_norm": 0.7241398096084595, + "learning_rate": 0.00016628814427068953, + "loss": 0.6996, + "step": 505 + }, + { + "epoch": 0.816, + "grad_norm": 0.7807374596595764, + "learning_rate": 0.00016565857557529566, + "loss": 0.7542, + "step": 510 + }, + { + "epoch": 0.824, + "grad_norm": 0.763768196105957, + "learning_rate": 0.00016502439873500289, + "loss": 0.7175, + "step": 515 + }, + { + "epoch": 0.832, + "grad_norm": 0.6105090379714966, + "learning_rate": 0.0001643856582585254, + "loss": 0.7565, + "step": 520 + }, + { + "epoch": 0.84, + "grad_norm": 0.5686540603637695, + "learning_rate": 0.000163742398974869, + "loss": 0.7339, + "step": 525 + }, + { + "epoch": 0.848, + "grad_norm": 0.5341500043869019, + "learning_rate": 0.00016309466603018496, + "loss": 0.569, + "step": 530 + }, + { + "epoch": 0.856, + "grad_norm": 0.7274748682975769, + "learning_rate": 0.00016244250488460158, + "loss": 0.7556, + "step": 535 + }, + { + "epoch": 0.864, + "grad_norm": 0.7321165204048157, + "learning_rate": 0.00016178596130903344, + "loss": 0.7084, + "step": 540 + }, + { + "epoch": 0.872, + "grad_norm": 0.5086159110069275, + "learning_rate": 0.00016112508138196917, + "loss": 0.6935, + "step": 545 + }, + { + "epoch": 0.88, + "grad_norm": 0.4714389443397522, + "learning_rate": 0.0001604599114862375, + "loss": 0.7076, + "step": 550 + }, + { + "epoch": 0.888, + "grad_norm": 0.5031452178955078, + "learning_rate": 0.0001597904983057519, + "loss": 0.7151, + "step": 555 + }, + { + "epoch": 0.896, + "grad_norm": 0.7745943665504456, + "learning_rate": 0.0001591168888222342, + "loss": 0.7001, + "step": 560 + }, + { + "epoch": 0.904, + "grad_norm": 0.6076303124427795, + "learning_rate": 0.00015843913031191723, + "loss": 0.7285, + "step": 565 + }, + { + "epoch": 0.912, + "grad_norm": 0.7456529140472412, + "learning_rate": 0.00015775727034222675, + "loss": 0.8041, + "step": 570 + }, + { + "epoch": 0.92, + "grad_norm": 0.5760998725891113, + "learning_rate": 0.0001570713567684432, + "loss": 0.7353, + "step": 575 + }, + { + "epoch": 0.928, + "grad_norm": 0.7057327032089233, + "learning_rate": 0.00015638143773034267, + "loss": 0.7792, + "step": 580 + }, + { + "epoch": 0.936, + "grad_norm": 0.7615967392921448, + "learning_rate": 0.00015568756164881882, + "loss": 1.0121, + "step": 585 + }, + { + "epoch": 0.944, + "grad_norm": 0.6304950714111328, + "learning_rate": 0.000154989777222484, + "loss": 0.7727, + "step": 590 + }, + { + "epoch": 0.952, + "grad_norm": 0.6852543950080872, + "learning_rate": 0.00015428813342425177, + "loss": 0.741, + "step": 595 + }, + { + "epoch": 0.96, + "grad_norm": 0.6379660964012146, + "learning_rate": 0.00015358267949789966, + "loss": 0.6919, + "step": 600 + }, + { + "epoch": 0.968, + "grad_norm": 0.5846463441848755, + "learning_rate": 0.00015287346495461315, + "loss": 0.7163, + "step": 605 + }, + { + "epoch": 0.976, + "grad_norm": 0.5999557971954346, + "learning_rate": 0.0001521605395695108, + "loss": 0.8152, + "step": 610 + }, + { + "epoch": 0.984, + "grad_norm": 0.5806307196617126, + "learning_rate": 0.00015144395337815064, + "loss": 0.6709, + "step": 615 + }, + { + "epoch": 0.992, + "grad_norm": 0.6559942960739136, + "learning_rate": 0.00015072375667301893, + "loss": 0.6527, + "step": 620 + }, + { + "epoch": 1.0, + "grad_norm": 0.6287715435028076, + "learning_rate": 0.00015000000000000001, + "loss": 0.8194, + "step": 625 + }, + { + "epoch": 1.008, + "grad_norm": 0.616222620010376, + "learning_rate": 0.00014927273415482915, + "loss": 0.6627, + "step": 630 + }, + { + "epoch": 1.016, + "grad_norm": 0.4750412106513977, + "learning_rate": 0.0001485420101795274, + "loss": 0.6366, + "step": 635 + }, + { + "epoch": 1.024, + "grad_norm": 0.5122964978218079, + "learning_rate": 0.00014780787935881923, + "loss": 0.6717, + "step": 640 + }, + { + "epoch": 1.032, + "grad_norm": 0.7382633090019226, + "learning_rate": 0.0001470703932165333, + "loss": 0.6483, + "step": 645 + }, + { + "epoch": 1.04, + "grad_norm": 0.6540554761886597, + "learning_rate": 0.00014632960351198618, + "loss": 0.6151, + "step": 650 + }, + { + "epoch": 1.048, + "grad_norm": 0.4776591956615448, + "learning_rate": 0.00014558556223635003, + "loss": 0.6707, + "step": 655 + }, + { + "epoch": 1.056, + "grad_norm": 0.8012662529945374, + "learning_rate": 0.00014483832160900326, + "loss": 0.6125, + "step": 660 + }, + { + "epoch": 1.064, + "grad_norm": 0.6735953092575073, + "learning_rate": 0.00014408793407386588, + "loss": 0.6206, + "step": 665 + }, + { + "epoch": 1.072, + "grad_norm": 0.5640230774879456, + "learning_rate": 0.00014333445229571873, + "loss": 0.6161, + "step": 670 + }, + { + "epoch": 1.08, + "grad_norm": 0.5928654074668884, + "learning_rate": 0.00014257792915650728, + "loss": 0.6583, + "step": 675 + }, + { + "epoch": 1.088, + "grad_norm": 0.7347397208213806, + "learning_rate": 0.00014181841775163013, + "loss": 0.6222, + "step": 680 + }, + { + "epoch": 1.096, + "grad_norm": 0.593773365020752, + "learning_rate": 0.0001410559713862128, + "loss": 0.716, + "step": 685 + }, + { + "epoch": 1.104, + "grad_norm": 0.6244611144065857, + "learning_rate": 0.00014029064357136628, + "loss": 0.6198, + "step": 690 + }, + { + "epoch": 1.112, + "grad_norm": 0.5083370804786682, + "learning_rate": 0.00013952248802043165, + "loss": 0.6389, + "step": 695 + }, + { + "epoch": 1.12, + "grad_norm": 0.5241413116455078, + "learning_rate": 0.0001387515586452103, + "loss": 0.6842, + "step": 700 + }, + { + "epoch": 1.1280000000000001, + "grad_norm": 0.524029016494751, + "learning_rate": 0.00013797790955218014, + "loss": 0.6071, + "step": 705 + }, + { + "epoch": 1.1360000000000001, + "grad_norm": 0.5097878575325012, + "learning_rate": 0.00013720159503869815, + "loss": 0.5915, + "step": 710 + }, + { + "epoch": 1.144, + "grad_norm": 0.5782963037490845, + "learning_rate": 0.00013642266958918984, + "loss": 0.6794, + "step": 715 + }, + { + "epoch": 1.152, + "grad_norm": 0.6088266372680664, + "learning_rate": 0.00013564118787132506, + "loss": 0.6773, + "step": 720 + }, + { + "epoch": 1.16, + "grad_norm": 0.7768995761871338, + "learning_rate": 0.00013485720473218154, + "loss": 0.668, + "step": 725 + }, + { + "epoch": 1.168, + "grad_norm": 0.6645551919937134, + "learning_rate": 0.0001340707751943952, + "loss": 0.6997, + "step": 730 + }, + { + "epoch": 1.176, + "grad_norm": 0.9228842258453369, + "learning_rate": 0.00013328195445229868, + "loss": 0.831, + "step": 735 + }, + { + "epoch": 1.184, + "grad_norm": 0.7556049823760986, + "learning_rate": 0.00013249079786804765, + "loss": 0.6378, + "step": 740 + }, + { + "epoch": 1.192, + "grad_norm": 0.832775354385376, + "learning_rate": 0.0001316973609677352, + "loss": 0.6547, + "step": 745 + }, + { + "epoch": 1.2, + "grad_norm": 0.7329304814338684, + "learning_rate": 0.00013090169943749476, + "loss": 0.5808, + "step": 750 + }, + { + "epoch": 1.208, + "grad_norm": 0.7193475961685181, + "learning_rate": 0.00013010386911959206, + "loss": 0.5582, + "step": 755 + }, + { + "epoch": 1.216, + "grad_norm": 0.6274734735488892, + "learning_rate": 0.00012930392600850573, + "loss": 0.5801, + "step": 760 + }, + { + "epoch": 1.224, + "grad_norm": 0.6485865712165833, + "learning_rate": 0.0001285019262469976, + "loss": 0.65, + "step": 765 + }, + { + "epoch": 1.232, + "grad_norm": 0.7164427042007446, + "learning_rate": 0.00012769792612217224, + "loss": 0.6627, + "step": 770 + }, + { + "epoch": 1.24, + "grad_norm": 0.600775957107544, + "learning_rate": 0.00012689198206152657, + "loss": 0.5603, + "step": 775 + }, + { + "epoch": 1.248, + "grad_norm": 0.8377975225448608, + "learning_rate": 0.00012608415062898972, + "loss": 0.6525, + "step": 780 + }, + { + "epoch": 1.256, + "grad_norm": 0.8069924116134644, + "learning_rate": 0.00012527448852095295, + "loss": 0.6731, + "step": 785 + }, + { + "epoch": 1.264, + "grad_norm": 0.6501213908195496, + "learning_rate": 0.00012446305256229073, + "loss": 0.6255, + "step": 790 + }, + { + "epoch": 1.272, + "grad_norm": 0.62812340259552, + "learning_rate": 0.00012364989970237248, + "loss": 0.6585, + "step": 795 + }, + { + "epoch": 1.28, + "grad_norm": 0.5702307820320129, + "learning_rate": 0.00012283508701106557, + "loss": 0.5996, + "step": 800 + }, + { + "epoch": 1.288, + "grad_norm": 0.6311281323432922, + "learning_rate": 0.00012201867167473015, + "loss": 0.6355, + "step": 805 + }, + { + "epoch": 1.296, + "grad_norm": 0.5885419249534607, + "learning_rate": 0.00012120071099220549, + "loss": 0.6615, + "step": 810 + }, + { + "epoch": 1.304, + "grad_norm": 0.5239307284355164, + "learning_rate": 0.0001203812623707885, + "loss": 0.6096, + "step": 815 + }, + { + "epoch": 1.312, + "grad_norm": 0.6101869940757751, + "learning_rate": 0.00011956038332220483, + "loss": 0.5984, + "step": 820 + }, + { + "epoch": 1.32, + "grad_norm": 0.4395413100719452, + "learning_rate": 0.00011873813145857249, + "loss": 0.5569, + "step": 825 + }, + { + "epoch": 1.328, + "grad_norm": 0.8984820246696472, + "learning_rate": 0.00011791456448835825, + "loss": 0.7088, + "step": 830 + }, + { + "epoch": 1.336, + "grad_norm": 0.7709664106369019, + "learning_rate": 0.00011708974021232769, + "loss": 0.6731, + "step": 835 + }, + { + "epoch": 1.3439999999999999, + "grad_norm": 0.6782217025756836, + "learning_rate": 0.00011626371651948838, + "loss": 0.6188, + "step": 840 + }, + { + "epoch": 1.3519999999999999, + "grad_norm": 0.6427358984947205, + "learning_rate": 0.00011543655138302714, + "loss": 0.7004, + "step": 845 + }, + { + "epoch": 1.3599999999999999, + "grad_norm": 0.5902594923973083, + "learning_rate": 0.00011460830285624118, + "loss": 0.5884, + "step": 850 + }, + { + "epoch": 1.3679999999999999, + "grad_norm": 0.5935835838317871, + "learning_rate": 0.0001137790290684638, + "loss": 0.5739, + "step": 855 + }, + { + "epoch": 1.376, + "grad_norm": 0.6752728223800659, + "learning_rate": 0.00011294878822098469, + "loss": 0.6435, + "step": 860 + }, + { + "epoch": 1.384, + "grad_norm": 0.7927135825157166, + "learning_rate": 0.00011211763858296507, + "loss": 0.6897, + "step": 865 + }, + { + "epoch": 1.392, + "grad_norm": 0.714499294757843, + "learning_rate": 0.00011128563848734816, + "loss": 0.6641, + "step": 870 + }, + { + "epoch": 1.4, + "grad_norm": 0.7086356282234192, + "learning_rate": 0.00011045284632676536, + "loss": 0.6273, + "step": 875 + }, + { + "epoch": 1.408, + "grad_norm": 0.6125518679618835, + "learning_rate": 0.00010961932054943778, + "loss": 0.6437, + "step": 880 + }, + { + "epoch": 1.416, + "grad_norm": 0.5635287165641785, + "learning_rate": 0.00010878511965507434, + "loss": 0.6345, + "step": 885 + }, + { + "epoch": 1.424, + "grad_norm": 0.47936007380485535, + "learning_rate": 0.00010795030219076599, + "loss": 0.5913, + "step": 890 + }, + { + "epoch": 1.432, + "grad_norm": 0.7142558097839355, + "learning_rate": 0.00010711492674687671, + "loss": 0.6482, + "step": 895 + }, + { + "epoch": 1.44, + "grad_norm": 0.5252729058265686, + "learning_rate": 0.00010627905195293135, + "loss": 0.6165, + "step": 900 + }, + { + "epoch": 1.448, + "grad_norm": 0.896318793296814, + "learning_rate": 0.00010544273647350092, + "loss": 0.634, + "step": 905 + }, + { + "epoch": 1.456, + "grad_norm": 0.6029036045074463, + "learning_rate": 0.00010460603900408523, + "loss": 0.6509, + "step": 910 + }, + { + "epoch": 1.464, + "grad_norm": 0.6835671663284302, + "learning_rate": 0.00010376901826699348, + "loss": 0.6212, + "step": 915 + }, + { + "epoch": 1.472, + "grad_norm": 0.7098750472068787, + "learning_rate": 0.00010293173300722285, + "loss": 0.7305, + "step": 920 + }, + { + "epoch": 1.48, + "grad_norm": 0.675316333770752, + "learning_rate": 0.0001020942419883357, + "loss": 0.6685, + "step": 925 + }, + { + "epoch": 1.488, + "grad_norm": 0.6675406098365784, + "learning_rate": 0.00010125660398833528, + "loss": 0.6214, + "step": 930 + }, + { + "epoch": 1.496, + "grad_norm": 0.6629154682159424, + "learning_rate": 0.0001004188777955404, + "loss": 0.6035, + "step": 935 + }, + { + "epoch": 1.504, + "grad_norm": 0.7732692360877991, + "learning_rate": 9.958112220445963e-05, + "loss": 0.5868, + "step": 940 + }, + { + "epoch": 1.512, + "grad_norm": 0.6238484978675842, + "learning_rate": 9.874339601166473e-05, + "loss": 0.6003, + "step": 945 + }, + { + "epoch": 1.52, + "grad_norm": 0.6622412800788879, + "learning_rate": 9.790575801166432e-05, + "loss": 0.5854, + "step": 950 + }, + { + "epoch": 1.528, + "grad_norm": 0.7598085999488831, + "learning_rate": 9.706826699277718e-05, + "loss": 0.5882, + "step": 955 + }, + { + "epoch": 1.536, + "grad_norm": 0.8870390057563782, + "learning_rate": 9.623098173300654e-05, + "loss": 0.7187, + "step": 960 + }, + { + "epoch": 1.544, + "grad_norm": 0.702232837677002, + "learning_rate": 9.539396099591476e-05, + "loss": 0.6156, + "step": 965 + }, + { + "epoch": 1.552, + "grad_norm": 0.694139301776886, + "learning_rate": 9.455726352649911e-05, + "loss": 0.6488, + "step": 970 + }, + { + "epoch": 1.56, + "grad_norm": 0.5684956312179565, + "learning_rate": 9.372094804706867e-05, + "loss": 0.6601, + "step": 975 + }, + { + "epoch": 1.568, + "grad_norm": 0.6327118277549744, + "learning_rate": 9.288507325312335e-05, + "loss": 0.5968, + "step": 980 + }, + { + "epoch": 1.576, + "grad_norm": 0.5732144117355347, + "learning_rate": 9.204969780923403e-05, + "loss": 0.7034, + "step": 985 + }, + { + "epoch": 1.584, + "grad_norm": 0.7025273442268372, + "learning_rate": 9.121488034492569e-05, + "loss": 0.5973, + "step": 990 + }, + { + "epoch": 1.592, + "grad_norm": 0.9270740151405334, + "learning_rate": 9.038067945056227e-05, + "loss": 0.7877, + "step": 995 + }, + { + "epoch": 1.6, + "grad_norm": 0.6676818132400513, + "learning_rate": 8.954715367323468e-05, + "loss": 0.644, + "step": 1000 + } + ], + "logging_steps": 5, + "max_steps": 1875, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6.240126522831667e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b72392a9094690809605a9b89988a05f68c770c0 --- /dev/null +++ b/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f6303cd7c1754cff6fd463f0e4dddf2a566ba7f29bbf2a72fbb560b32d5ce98 +size 5432 diff --git a/checkpoint-1100/README.md b/checkpoint-1100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..31d07dbe00996b0568fd0eb91d05ac9a95722459 --- /dev/null +++ b/checkpoint-1100/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.2-1B +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-1100/adapter_config.json b/checkpoint-1100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73202e5c7ba7132c890e52da7442b36a9efff4dd --- /dev/null +++ b/checkpoint-1100/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.2-1B", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "down_proj", + "k_proj", + "gate_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1100/adapter_model.safetensors b/checkpoint-1100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..18a0bd786dcf3fe3b7918b35e6c922d60d1604a2 --- /dev/null +++ b/checkpoint-1100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f7abf739a40d37ed12434376746bcae636af9a92af6732ea3a553e7b927276e +size 22573704 diff --git a/checkpoint-1100/optimizer.pt b/checkpoint-1100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1d9d0968720d4d5723fc5b064e771bad09742d7a --- /dev/null +++ b/checkpoint-1100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df773dc0557d5ee1d30af4e55c93d47161b20f473a41917585beceb57158f450 +size 45276986 diff --git a/checkpoint-1100/rng_state.pth b/checkpoint-1100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..33cefe6919222ddfa3c3946df69b8e5c5a17a0fc --- /dev/null +++ b/checkpoint-1100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ff264f99d31b522cc7e2a4eac9d38606d0c58a34c0adc74d71e0ca8b371dc36 +size 14244 diff --git a/checkpoint-1100/scheduler.pt b/checkpoint-1100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f04a32d1baf907c9157b76bef34cfa7539944284 --- /dev/null +++ b/checkpoint-1100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf616674b42264e305610180821eb0e8f03cfbba013a34bdcde2f8537d5f3c69 +size 1064 diff --git a/checkpoint-1100/special_tokens_map.json b/checkpoint-1100/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/checkpoint-1100/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/checkpoint-1100/tokenizer.json b/checkpoint-1100/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-1100/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-1100/tokenizer_config.json b/checkpoint-1100/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61c03bf2b5e0c03056cebf7a58c8c096dafc1ffc --- /dev/null +++ b/checkpoint-1100/tokenizer_config.json @@ -0,0 +1,2065 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end_of_text|>' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|end_of_text|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1100/trainer_state.json b/checkpoint-1100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ff9a783c5895f232d29b35fc63ca00f618b680c5 --- /dev/null +++ b/checkpoint-1100/trainer_state.json @@ -0,0 +1,1573 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.76, + "eval_steps": 500, + "global_step": 1100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008, + "grad_norm": 0.8485889434814453, + "learning_rate": 0.0001999964908278481, + "loss": 1.2049, + "step": 5 + }, + { + "epoch": 0.016, + "grad_norm": 0.47789862751960754, + "learning_rate": 0.00019998596355767805, + "loss": 0.9333, + "step": 10 + }, + { + "epoch": 0.024, + "grad_norm": 1.017558217048645, + "learning_rate": 0.00019996841892833, + "loss": 0.8671, + "step": 15 + }, + { + "epoch": 0.032, + "grad_norm": 0.6610977053642273, + "learning_rate": 0.00019994385817114646, + "loss": 0.7979, + "step": 20 + }, + { + "epoch": 0.04, + "grad_norm": 0.6075429320335388, + "learning_rate": 0.00019991228300988585, + "loss": 0.7662, + "step": 25 + }, + { + "epoch": 0.048, + "grad_norm": 0.6595763564109802, + "learning_rate": 0.00019987369566060176, + "loss": 0.7929, + "step": 30 + }, + { + "epoch": 0.056, + "grad_norm": 0.6968618035316467, + "learning_rate": 0.00019982809883148722, + "loss": 0.7683, + "step": 35 + }, + { + "epoch": 0.064, + "grad_norm": 0.4889592230319977, + "learning_rate": 0.00019977549572268468, + "loss": 0.8667, + "step": 40 + }, + { + "epoch": 0.072, + "grad_norm": 0.6651108264923096, + "learning_rate": 0.0001997158900260614, + "loss": 0.8446, + "step": 45 + }, + { + "epoch": 0.08, + "grad_norm": 0.5898510217666626, + "learning_rate": 0.00019964928592495045, + "loss": 0.9051, + "step": 50 + }, + { + "epoch": 0.088, + "grad_norm": 0.4398016035556793, + "learning_rate": 0.00019957568809385694, + "loss": 0.7235, + "step": 55 + }, + { + "epoch": 0.096, + "grad_norm": 0.6901968121528625, + "learning_rate": 0.00019949510169813003, + "loss": 0.8169, + "step": 60 + }, + { + "epoch": 0.104, + "grad_norm": 0.6267213225364685, + "learning_rate": 0.00019940753239360047, + "loss": 0.8266, + "step": 65 + }, + { + "epoch": 0.112, + "grad_norm": 0.48524895310401917, + "learning_rate": 0.00019931298632618356, + "loss": 0.758, + "step": 70 + }, + { + "epoch": 0.12, + "grad_norm": 0.5294132232666016, + "learning_rate": 0.0001992114701314478, + "loss": 0.7759, + "step": 75 + }, + { + "epoch": 0.128, + "grad_norm": 0.48957982659339905, + "learning_rate": 0.0001991029909341493, + "loss": 0.7797, + "step": 80 + }, + { + "epoch": 0.136, + "grad_norm": 0.645412802696228, + "learning_rate": 0.00019898755634773158, + "loss": 0.7437, + "step": 85 + }, + { + "epoch": 0.144, + "grad_norm": 0.43297675251960754, + "learning_rate": 0.0001988651744737914, + "loss": 0.8043, + "step": 90 + }, + { + "epoch": 0.152, + "grad_norm": 0.5513920783996582, + "learning_rate": 0.00019873585390151003, + "loss": 0.7701, + "step": 95 + }, + { + "epoch": 0.16, + "grad_norm": 0.8462435007095337, + "learning_rate": 0.0001985996037070505, + "loss": 0.709, + "step": 100 + }, + { + "epoch": 0.168, + "grad_norm": 0.6892585158348083, + "learning_rate": 0.00019845643345292054, + "loss": 0.7377, + "step": 105 + }, + { + "epoch": 0.176, + "grad_norm": 0.4617864191532135, + "learning_rate": 0.00019830635318730154, + "loss": 0.8352, + "step": 110 + }, + { + "epoch": 0.184, + "grad_norm": 0.6300354599952698, + "learning_rate": 0.0001981493734433433, + "loss": 0.7738, + "step": 115 + }, + { + "epoch": 0.192, + "grad_norm": 0.8086859583854675, + "learning_rate": 0.0001979855052384247, + "loss": 0.8067, + "step": 120 + }, + { + "epoch": 0.2, + "grad_norm": 0.6272985935211182, + "learning_rate": 0.00019781476007338058, + "loss": 0.7456, + "step": 125 + }, + { + "epoch": 0.208, + "grad_norm": 0.44750839471817017, + "learning_rate": 0.00019763714993169452, + "loss": 0.758, + "step": 130 + }, + { + "epoch": 0.216, + "grad_norm": 0.5053977370262146, + "learning_rate": 0.00019745268727865774, + "loss": 0.7895, + "step": 135 + }, + { + "epoch": 0.224, + "grad_norm": 0.41920769214630127, + "learning_rate": 0.00019726138506049438, + "loss": 0.7302, + "step": 140 + }, + { + "epoch": 0.232, + "grad_norm": 0.38280290365219116, + "learning_rate": 0.00019706325670345275, + "loss": 0.8152, + "step": 145 + }, + { + "epoch": 0.24, + "grad_norm": 0.554710865020752, + "learning_rate": 0.0001968583161128631, + "loss": 0.8461, + "step": 150 + }, + { + "epoch": 0.248, + "grad_norm": 0.5612509250640869, + "learning_rate": 0.00019664657767216176, + "loss": 0.7787, + "step": 155 + }, + { + "epoch": 0.256, + "grad_norm": 0.610614538192749, + "learning_rate": 0.00019642805624188147, + "loss": 0.7574, + "step": 160 + }, + { + "epoch": 0.264, + "grad_norm": 0.679517924785614, + "learning_rate": 0.0001962027671586086, + "loss": 0.8487, + "step": 165 + }, + { + "epoch": 0.272, + "grad_norm": 0.6685434579849243, + "learning_rate": 0.00019597072623390668, + "loss": 0.6611, + "step": 170 + }, + { + "epoch": 0.28, + "grad_norm": 0.480293869972229, + "learning_rate": 0.00019573194975320673, + "loss": 0.7802, + "step": 175 + }, + { + "epoch": 0.288, + "grad_norm": 0.7727369070053101, + "learning_rate": 0.00019548645447466431, + "loss": 0.6727, + "step": 180 + }, + { + "epoch": 0.296, + "grad_norm": 0.6371043920516968, + "learning_rate": 0.00019523425762798329, + "loss": 0.7502, + "step": 185 + }, + { + "epoch": 0.304, + "grad_norm": 0.6399966478347778, + "learning_rate": 0.00019497537691320668, + "loss": 0.8401, + "step": 190 + }, + { + "epoch": 0.312, + "grad_norm": 0.7263137698173523, + "learning_rate": 0.00019470983049947444, + "loss": 0.7494, + "step": 195 + }, + { + "epoch": 0.32, + "grad_norm": 0.402416467666626, + "learning_rate": 0.00019443763702374812, + "loss": 0.7842, + "step": 200 + }, + { + "epoch": 0.328, + "grad_norm": 0.6639626026153564, + "learning_rate": 0.00019415881558950302, + "loss": 0.8082, + "step": 205 + }, + { + "epoch": 0.336, + "grad_norm": 0.5801042914390564, + "learning_rate": 0.00019387338576538744, + "loss": 0.7883, + "step": 210 + }, + { + "epoch": 0.344, + "grad_norm": 0.5533607006072998, + "learning_rate": 0.00019358136758384912, + "loss": 0.7356, + "step": 215 + }, + { + "epoch": 0.352, + "grad_norm": 0.6019654273986816, + "learning_rate": 0.00019328278153972947, + "loss": 0.7891, + "step": 220 + }, + { + "epoch": 0.36, + "grad_norm": 0.5344104170799255, + "learning_rate": 0.00019297764858882514, + "loss": 0.7671, + "step": 225 + }, + { + "epoch": 0.368, + "grad_norm": 0.5494843125343323, + "learning_rate": 0.0001926659901464172, + "loss": 0.6608, + "step": 230 + }, + { + "epoch": 0.376, + "grad_norm": 0.465420126914978, + "learning_rate": 0.00019234782808576824, + "loss": 0.647, + "step": 235 + }, + { + "epoch": 0.384, + "grad_norm": 0.5202775001525879, + "learning_rate": 0.00019202318473658705, + "loss": 0.729, + "step": 240 + }, + { + "epoch": 0.392, + "grad_norm": 0.5757818222045898, + "learning_rate": 0.00019169208288346166, + "loss": 0.6713, + "step": 245 + }, + { + "epoch": 0.4, + "grad_norm": 0.46555572748184204, + "learning_rate": 0.0001913545457642601, + "loss": 0.7049, + "step": 250 + }, + { + "epoch": 0.408, + "grad_norm": 0.5101790428161621, + "learning_rate": 0.00019101059706849957, + "loss": 0.7419, + "step": 255 + }, + { + "epoch": 0.416, + "grad_norm": 0.6083744764328003, + "learning_rate": 0.00019066026093568378, + "loss": 0.7148, + "step": 260 + }, + { + "epoch": 0.424, + "grad_norm": 0.4719640612602234, + "learning_rate": 0.00019030356195360874, + "loss": 0.7493, + "step": 265 + }, + { + "epoch": 0.432, + "grad_norm": 0.7365225553512573, + "learning_rate": 0.0001899405251566371, + "loss": 0.7652, + "step": 270 + }, + { + "epoch": 0.44, + "grad_norm": 0.4452705383300781, + "learning_rate": 0.0001895711760239413, + "loss": 0.7438, + "step": 275 + }, + { + "epoch": 0.448, + "grad_norm": 0.6071786284446716, + "learning_rate": 0.0001891955404777151, + "loss": 0.7683, + "step": 280 + }, + { + "epoch": 0.456, + "grad_norm": 0.5774498581886292, + "learning_rate": 0.00018881364488135448, + "loss": 0.8115, + "step": 285 + }, + { + "epoch": 0.464, + "grad_norm": 0.6134682893753052, + "learning_rate": 0.00018842551603760724, + "loss": 0.8335, + "step": 290 + }, + { + "epoch": 0.472, + "grad_norm": 0.4869893193244934, + "learning_rate": 0.00018803118118669202, + "loss": 0.6933, + "step": 295 + }, + { + "epoch": 0.48, + "grad_norm": 0.6457111239433289, + "learning_rate": 0.00018763066800438636, + "loss": 0.7515, + "step": 300 + }, + { + "epoch": 0.488, + "grad_norm": 0.59674471616745, + "learning_rate": 0.0001872240046000844, + "loss": 0.6931, + "step": 305 + }, + { + "epoch": 0.496, + "grad_norm": 0.44608160853385925, + "learning_rate": 0.00018681121951482393, + "loss": 0.782, + "step": 310 + }, + { + "epoch": 0.504, + "grad_norm": 0.5934664607048035, + "learning_rate": 0.00018639234171928353, + "loss": 0.7361, + "step": 315 + }, + { + "epoch": 0.512, + "grad_norm": 0.49716323614120483, + "learning_rate": 0.0001859674006117491, + "loss": 0.7443, + "step": 320 + }, + { + "epoch": 0.52, + "grad_norm": 0.47995495796203613, + "learning_rate": 0.00018553642601605068, + "loss": 0.7221, + "step": 325 + }, + { + "epoch": 0.528, + "grad_norm": 0.5177399516105652, + "learning_rate": 0.00018509944817946922, + "loss": 0.7622, + "step": 330 + }, + { + "epoch": 0.536, + "grad_norm": 0.6638798713684082, + "learning_rate": 0.0001846564977706138, + "loss": 0.8556, + "step": 335 + }, + { + "epoch": 0.544, + "grad_norm": 0.5056771636009216, + "learning_rate": 0.00018420760587726923, + "loss": 0.7814, + "step": 340 + }, + { + "epoch": 0.552, + "grad_norm": 0.44543707370758057, + "learning_rate": 0.0001837528040042142, + "loss": 0.722, + "step": 345 + }, + { + "epoch": 0.56, + "grad_norm": 0.6765120625495911, + "learning_rate": 0.00018329212407100994, + "loss": 0.7903, + "step": 350 + }, + { + "epoch": 0.568, + "grad_norm": 0.49232372641563416, + "learning_rate": 0.00018282559840976042, + "loss": 0.6996, + "step": 355 + }, + { + "epoch": 0.576, + "grad_norm": 0.47392791509628296, + "learning_rate": 0.00018235325976284275, + "loss": 0.773, + "step": 360 + }, + { + "epoch": 0.584, + "grad_norm": 0.5056615471839905, + "learning_rate": 0.00018187514128060946, + "loss": 0.728, + "step": 365 + }, + { + "epoch": 0.592, + "grad_norm": 0.5857616662979126, + "learning_rate": 0.00018139127651906184, + "loss": 0.7659, + "step": 370 + }, + { + "epoch": 0.6, + "grad_norm": 0.5966864228248596, + "learning_rate": 0.00018090169943749476, + "loss": 0.7039, + "step": 375 + }, + { + "epoch": 0.608, + "grad_norm": 0.4524347484111786, + "learning_rate": 0.00018040644439611348, + "loss": 0.7125, + "step": 380 + }, + { + "epoch": 0.616, + "grad_norm": 0.5570976138114929, + "learning_rate": 0.00017990554615362198, + "loss": 0.698, + "step": 385 + }, + { + "epoch": 0.624, + "grad_norm": 0.6045777201652527, + "learning_rate": 0.00017939903986478355, + "loss": 0.8255, + "step": 390 + }, + { + "epoch": 0.632, + "grad_norm": 0.6149687767028809, + "learning_rate": 0.00017888696107795342, + "loss": 0.6616, + "step": 395 + }, + { + "epoch": 0.64, + "grad_norm": 0.4873579144477844, + "learning_rate": 0.000178369345732584, + "loss": 0.7452, + "step": 400 + }, + { + "epoch": 0.648, + "grad_norm": 0.5569061636924744, + "learning_rate": 0.00017784623015670238, + "loss": 0.7652, + "step": 405 + }, + { + "epoch": 0.656, + "grad_norm": 0.5825181603431702, + "learning_rate": 0.00017731765106436073, + "loss": 0.7793, + "step": 410 + }, + { + "epoch": 0.664, + "grad_norm": 0.4047383666038513, + "learning_rate": 0.00017678364555305978, + "loss": 0.6875, + "step": 415 + }, + { + "epoch": 0.672, + "grad_norm": 0.5080836415290833, + "learning_rate": 0.0001762442511011448, + "loss": 0.7465, + "step": 420 + }, + { + "epoch": 0.68, + "grad_norm": 0.5825940370559692, + "learning_rate": 0.00017569950556517566, + "loss": 0.7205, + "step": 425 + }, + { + "epoch": 0.688, + "grad_norm": 0.476992666721344, + "learning_rate": 0.00017514944717726962, + "loss": 0.6589, + "step": 430 + }, + { + "epoch": 0.696, + "grad_norm": 0.7424727082252502, + "learning_rate": 0.00017459411454241822, + "loss": 0.7035, + "step": 435 + }, + { + "epoch": 0.704, + "grad_norm": 0.6544787287712097, + "learning_rate": 0.00017403354663577783, + "loss": 0.787, + "step": 440 + }, + { + "epoch": 0.712, + "grad_norm": 0.49425187706947327, + "learning_rate": 0.00017346778279993415, + "loss": 0.7515, + "step": 445 + }, + { + "epoch": 0.72, + "grad_norm": 0.5473236441612244, + "learning_rate": 0.00017289686274214118, + "loss": 0.7199, + "step": 450 + }, + { + "epoch": 0.728, + "grad_norm": 0.6773544549942017, + "learning_rate": 0.00017232082653153422, + "loss": 0.8037, + "step": 455 + }, + { + "epoch": 0.736, + "grad_norm": 0.6355096101760864, + "learning_rate": 0.00017173971459631787, + "loss": 0.7502, + "step": 460 + }, + { + "epoch": 0.744, + "grad_norm": 0.47867000102996826, + "learning_rate": 0.00017115356772092857, + "loss": 0.7446, + "step": 465 + }, + { + "epoch": 0.752, + "grad_norm": 0.5135357975959778, + "learning_rate": 0.0001705624270431721, + "loss": 0.6507, + "step": 470 + }, + { + "epoch": 0.76, + "grad_norm": 0.48866042494773865, + "learning_rate": 0.00016996633405133655, + "loss": 0.7164, + "step": 475 + }, + { + "epoch": 0.768, + "grad_norm": 0.5892354249954224, + "learning_rate": 0.0001693653305812805, + "loss": 0.7621, + "step": 480 + }, + { + "epoch": 0.776, + "grad_norm": 0.6633970141410828, + "learning_rate": 0.00016875945881349676, + "loss": 0.7623, + "step": 485 + }, + { + "epoch": 0.784, + "grad_norm": 0.6444060802459717, + "learning_rate": 0.000168148761270152, + "loss": 0.6606, + "step": 490 + }, + { + "epoch": 0.792, + "grad_norm": 0.7012648582458496, + "learning_rate": 0.00016753328081210245, + "loss": 0.6941, + "step": 495 + }, + { + "epoch": 0.8, + "grad_norm": 0.7064160704612732, + "learning_rate": 0.00016691306063588583, + "loss": 0.6841, + "step": 500 + }, + { + "epoch": 0.808, + "grad_norm": 0.7241398096084595, + "learning_rate": 0.00016628814427068953, + "loss": 0.6996, + "step": 505 + }, + { + "epoch": 0.816, + "grad_norm": 0.7807374596595764, + "learning_rate": 0.00016565857557529566, + "loss": 0.7542, + "step": 510 + }, + { + "epoch": 0.824, + "grad_norm": 0.763768196105957, + "learning_rate": 0.00016502439873500289, + "loss": 0.7175, + "step": 515 + }, + { + "epoch": 0.832, + "grad_norm": 0.6105090379714966, + "learning_rate": 0.0001643856582585254, + "loss": 0.7565, + "step": 520 + }, + { + "epoch": 0.84, + "grad_norm": 0.5686540603637695, + "learning_rate": 0.000163742398974869, + "loss": 0.7339, + "step": 525 + }, + { + "epoch": 0.848, + "grad_norm": 0.5341500043869019, + "learning_rate": 0.00016309466603018496, + "loss": 0.569, + "step": 530 + }, + { + "epoch": 0.856, + "grad_norm": 0.7274748682975769, + "learning_rate": 0.00016244250488460158, + "loss": 0.7556, + "step": 535 + }, + { + "epoch": 0.864, + "grad_norm": 0.7321165204048157, + "learning_rate": 0.00016178596130903344, + "loss": 0.7084, + "step": 540 + }, + { + "epoch": 0.872, + "grad_norm": 0.5086159110069275, + "learning_rate": 0.00016112508138196917, + "loss": 0.6935, + "step": 545 + }, + { + "epoch": 0.88, + "grad_norm": 0.4714389443397522, + "learning_rate": 0.0001604599114862375, + "loss": 0.7076, + "step": 550 + }, + { + "epoch": 0.888, + "grad_norm": 0.5031452178955078, + "learning_rate": 0.0001597904983057519, + "loss": 0.7151, + "step": 555 + }, + { + "epoch": 0.896, + "grad_norm": 0.7745943665504456, + "learning_rate": 0.0001591168888222342, + "loss": 0.7001, + "step": 560 + }, + { + "epoch": 0.904, + "grad_norm": 0.6076303124427795, + "learning_rate": 0.00015843913031191723, + "loss": 0.7285, + "step": 565 + }, + { + "epoch": 0.912, + "grad_norm": 0.7456529140472412, + "learning_rate": 0.00015775727034222675, + "loss": 0.8041, + "step": 570 + }, + { + "epoch": 0.92, + "grad_norm": 0.5760998725891113, + "learning_rate": 0.0001570713567684432, + "loss": 0.7353, + "step": 575 + }, + { + "epoch": 0.928, + "grad_norm": 0.7057327032089233, + "learning_rate": 0.00015638143773034267, + "loss": 0.7792, + "step": 580 + }, + { + "epoch": 0.936, + "grad_norm": 0.7615967392921448, + "learning_rate": 0.00015568756164881882, + "loss": 1.0121, + "step": 585 + }, + { + "epoch": 0.944, + "grad_norm": 0.6304950714111328, + "learning_rate": 0.000154989777222484, + "loss": 0.7727, + "step": 590 + }, + { + "epoch": 0.952, + "grad_norm": 0.6852543950080872, + "learning_rate": 0.00015428813342425177, + "loss": 0.741, + "step": 595 + }, + { + "epoch": 0.96, + "grad_norm": 0.6379660964012146, + "learning_rate": 0.00015358267949789966, + "loss": 0.6919, + "step": 600 + }, + { + "epoch": 0.968, + "grad_norm": 0.5846463441848755, + "learning_rate": 0.00015287346495461315, + "loss": 0.7163, + "step": 605 + }, + { + "epoch": 0.976, + "grad_norm": 0.5999557971954346, + "learning_rate": 0.0001521605395695108, + "loss": 0.8152, + "step": 610 + }, + { + "epoch": 0.984, + "grad_norm": 0.5806307196617126, + "learning_rate": 0.00015144395337815064, + "loss": 0.6709, + "step": 615 + }, + { + "epoch": 0.992, + "grad_norm": 0.6559942960739136, + "learning_rate": 0.00015072375667301893, + "loss": 0.6527, + "step": 620 + }, + { + "epoch": 1.0, + "grad_norm": 0.6287715435028076, + "learning_rate": 0.00015000000000000001, + "loss": 0.8194, + "step": 625 + }, + { + "epoch": 1.008, + "grad_norm": 0.616222620010376, + "learning_rate": 0.00014927273415482915, + "loss": 0.6627, + "step": 630 + }, + { + "epoch": 1.016, + "grad_norm": 0.4750412106513977, + "learning_rate": 0.0001485420101795274, + "loss": 0.6366, + "step": 635 + }, + { + "epoch": 1.024, + "grad_norm": 0.5122964978218079, + "learning_rate": 0.00014780787935881923, + "loss": 0.6717, + "step": 640 + }, + { + "epoch": 1.032, + "grad_norm": 0.7382633090019226, + "learning_rate": 0.0001470703932165333, + "loss": 0.6483, + "step": 645 + }, + { + "epoch": 1.04, + "grad_norm": 0.6540554761886597, + "learning_rate": 0.00014632960351198618, + "loss": 0.6151, + "step": 650 + }, + { + "epoch": 1.048, + "grad_norm": 0.4776591956615448, + "learning_rate": 0.00014558556223635003, + "loss": 0.6707, + "step": 655 + }, + { + "epoch": 1.056, + "grad_norm": 0.8012662529945374, + "learning_rate": 0.00014483832160900326, + "loss": 0.6125, + "step": 660 + }, + { + "epoch": 1.064, + "grad_norm": 0.6735953092575073, + "learning_rate": 0.00014408793407386588, + "loss": 0.6206, + "step": 665 + }, + { + "epoch": 1.072, + "grad_norm": 0.5640230774879456, + "learning_rate": 0.00014333445229571873, + "loss": 0.6161, + "step": 670 + }, + { + "epoch": 1.08, + "grad_norm": 0.5928654074668884, + "learning_rate": 0.00014257792915650728, + "loss": 0.6583, + "step": 675 + }, + { + "epoch": 1.088, + "grad_norm": 0.7347397208213806, + "learning_rate": 0.00014181841775163013, + "loss": 0.6222, + "step": 680 + }, + { + "epoch": 1.096, + "grad_norm": 0.593773365020752, + "learning_rate": 0.0001410559713862128, + "loss": 0.716, + "step": 685 + }, + { + "epoch": 1.104, + "grad_norm": 0.6244611144065857, + "learning_rate": 0.00014029064357136628, + "loss": 0.6198, + "step": 690 + }, + { + "epoch": 1.112, + "grad_norm": 0.5083370804786682, + "learning_rate": 0.00013952248802043165, + "loss": 0.6389, + "step": 695 + }, + { + "epoch": 1.12, + "grad_norm": 0.5241413116455078, + "learning_rate": 0.0001387515586452103, + "loss": 0.6842, + "step": 700 + }, + { + "epoch": 1.1280000000000001, + "grad_norm": 0.524029016494751, + "learning_rate": 0.00013797790955218014, + "loss": 0.6071, + "step": 705 + }, + { + "epoch": 1.1360000000000001, + "grad_norm": 0.5097878575325012, + "learning_rate": 0.00013720159503869815, + "loss": 0.5915, + "step": 710 + }, + { + "epoch": 1.144, + "grad_norm": 0.5782963037490845, + "learning_rate": 0.00013642266958918984, + "loss": 0.6794, + "step": 715 + }, + { + "epoch": 1.152, + "grad_norm": 0.6088266372680664, + "learning_rate": 0.00013564118787132506, + "loss": 0.6773, + "step": 720 + }, + { + "epoch": 1.16, + "grad_norm": 0.7768995761871338, + "learning_rate": 0.00013485720473218154, + "loss": 0.668, + "step": 725 + }, + { + "epoch": 1.168, + "grad_norm": 0.6645551919937134, + "learning_rate": 0.0001340707751943952, + "loss": 0.6997, + "step": 730 + }, + { + "epoch": 1.176, + "grad_norm": 0.9228842258453369, + "learning_rate": 0.00013328195445229868, + "loss": 0.831, + "step": 735 + }, + { + "epoch": 1.184, + "grad_norm": 0.7556049823760986, + "learning_rate": 0.00013249079786804765, + "loss": 0.6378, + "step": 740 + }, + { + "epoch": 1.192, + "grad_norm": 0.832775354385376, + "learning_rate": 0.0001316973609677352, + "loss": 0.6547, + "step": 745 + }, + { + "epoch": 1.2, + "grad_norm": 0.7329304814338684, + "learning_rate": 0.00013090169943749476, + "loss": 0.5808, + "step": 750 + }, + { + "epoch": 1.208, + "grad_norm": 0.7193475961685181, + "learning_rate": 0.00013010386911959206, + "loss": 0.5582, + "step": 755 + }, + { + "epoch": 1.216, + "grad_norm": 0.6274734735488892, + "learning_rate": 0.00012930392600850573, + "loss": 0.5801, + "step": 760 + }, + { + "epoch": 1.224, + "grad_norm": 0.6485865712165833, + "learning_rate": 0.0001285019262469976, + "loss": 0.65, + "step": 765 + }, + { + "epoch": 1.232, + "grad_norm": 0.7164427042007446, + "learning_rate": 0.00012769792612217224, + "loss": 0.6627, + "step": 770 + }, + { + "epoch": 1.24, + "grad_norm": 0.600775957107544, + "learning_rate": 0.00012689198206152657, + "loss": 0.5603, + "step": 775 + }, + { + "epoch": 1.248, + "grad_norm": 0.8377975225448608, + "learning_rate": 0.00012608415062898972, + "loss": 0.6525, + "step": 780 + }, + { + "epoch": 1.256, + "grad_norm": 0.8069924116134644, + "learning_rate": 0.00012527448852095295, + "loss": 0.6731, + "step": 785 + }, + { + "epoch": 1.264, + "grad_norm": 0.6501213908195496, + "learning_rate": 0.00012446305256229073, + "loss": 0.6255, + "step": 790 + }, + { + "epoch": 1.272, + "grad_norm": 0.62812340259552, + "learning_rate": 0.00012364989970237248, + "loss": 0.6585, + "step": 795 + }, + { + "epoch": 1.28, + "grad_norm": 0.5702307820320129, + "learning_rate": 0.00012283508701106557, + "loss": 0.5996, + "step": 800 + }, + { + "epoch": 1.288, + "grad_norm": 0.6311281323432922, + "learning_rate": 0.00012201867167473015, + "loss": 0.6355, + "step": 805 + }, + { + "epoch": 1.296, + "grad_norm": 0.5885419249534607, + "learning_rate": 0.00012120071099220549, + "loss": 0.6615, + "step": 810 + }, + { + "epoch": 1.304, + "grad_norm": 0.5239307284355164, + "learning_rate": 0.0001203812623707885, + "loss": 0.6096, + "step": 815 + }, + { + "epoch": 1.312, + "grad_norm": 0.6101869940757751, + "learning_rate": 0.00011956038332220483, + "loss": 0.5984, + "step": 820 + }, + { + "epoch": 1.32, + "grad_norm": 0.4395413100719452, + "learning_rate": 0.00011873813145857249, + "loss": 0.5569, + "step": 825 + }, + { + "epoch": 1.328, + "grad_norm": 0.8984820246696472, + "learning_rate": 0.00011791456448835825, + "loss": 0.7088, + "step": 830 + }, + { + "epoch": 1.336, + "grad_norm": 0.7709664106369019, + "learning_rate": 0.00011708974021232769, + "loss": 0.6731, + "step": 835 + }, + { + "epoch": 1.3439999999999999, + "grad_norm": 0.6782217025756836, + "learning_rate": 0.00011626371651948838, + "loss": 0.6188, + "step": 840 + }, + { + "epoch": 1.3519999999999999, + "grad_norm": 0.6427358984947205, + "learning_rate": 0.00011543655138302714, + "loss": 0.7004, + "step": 845 + }, + { + "epoch": 1.3599999999999999, + "grad_norm": 0.5902594923973083, + "learning_rate": 0.00011460830285624118, + "loss": 0.5884, + "step": 850 + }, + { + "epoch": 1.3679999999999999, + "grad_norm": 0.5935835838317871, + "learning_rate": 0.0001137790290684638, + "loss": 0.5739, + "step": 855 + }, + { + "epoch": 1.376, + "grad_norm": 0.6752728223800659, + "learning_rate": 0.00011294878822098469, + "loss": 0.6435, + "step": 860 + }, + { + "epoch": 1.384, + "grad_norm": 0.7927135825157166, + "learning_rate": 0.00011211763858296507, + "loss": 0.6897, + "step": 865 + }, + { + "epoch": 1.392, + "grad_norm": 0.714499294757843, + "learning_rate": 0.00011128563848734816, + "loss": 0.6641, + "step": 870 + }, + { + "epoch": 1.4, + "grad_norm": 0.7086356282234192, + "learning_rate": 0.00011045284632676536, + "loss": 0.6273, + "step": 875 + }, + { + "epoch": 1.408, + "grad_norm": 0.6125518679618835, + "learning_rate": 0.00010961932054943778, + "loss": 0.6437, + "step": 880 + }, + { + "epoch": 1.416, + "grad_norm": 0.5635287165641785, + "learning_rate": 0.00010878511965507434, + "loss": 0.6345, + "step": 885 + }, + { + "epoch": 1.424, + "grad_norm": 0.47936007380485535, + "learning_rate": 0.00010795030219076599, + "loss": 0.5913, + "step": 890 + }, + { + "epoch": 1.432, + "grad_norm": 0.7142558097839355, + "learning_rate": 0.00010711492674687671, + "loss": 0.6482, + "step": 895 + }, + { + "epoch": 1.44, + "grad_norm": 0.5252729058265686, + "learning_rate": 0.00010627905195293135, + "loss": 0.6165, + "step": 900 + }, + { + "epoch": 1.448, + "grad_norm": 0.896318793296814, + "learning_rate": 0.00010544273647350092, + "loss": 0.634, + "step": 905 + }, + { + "epoch": 1.456, + "grad_norm": 0.6029036045074463, + "learning_rate": 0.00010460603900408523, + "loss": 0.6509, + "step": 910 + }, + { + "epoch": 1.464, + "grad_norm": 0.6835671663284302, + "learning_rate": 0.00010376901826699348, + "loss": 0.6212, + "step": 915 + }, + { + "epoch": 1.472, + "grad_norm": 0.7098750472068787, + "learning_rate": 0.00010293173300722285, + "loss": 0.7305, + "step": 920 + }, + { + "epoch": 1.48, + "grad_norm": 0.675316333770752, + "learning_rate": 0.0001020942419883357, + "loss": 0.6685, + "step": 925 + }, + { + "epoch": 1.488, + "grad_norm": 0.6675406098365784, + "learning_rate": 0.00010125660398833528, + "loss": 0.6214, + "step": 930 + }, + { + "epoch": 1.496, + "grad_norm": 0.6629154682159424, + "learning_rate": 0.0001004188777955404, + "loss": 0.6035, + "step": 935 + }, + { + "epoch": 1.504, + "grad_norm": 0.7732692360877991, + "learning_rate": 9.958112220445963e-05, + "loss": 0.5868, + "step": 940 + }, + { + "epoch": 1.512, + "grad_norm": 0.6238484978675842, + "learning_rate": 9.874339601166473e-05, + "loss": 0.6003, + "step": 945 + }, + { + "epoch": 1.52, + "grad_norm": 0.6622412800788879, + "learning_rate": 9.790575801166432e-05, + "loss": 0.5854, + "step": 950 + }, + { + "epoch": 1.528, + "grad_norm": 0.7598085999488831, + "learning_rate": 9.706826699277718e-05, + "loss": 0.5882, + "step": 955 + }, + { + "epoch": 1.536, + "grad_norm": 0.8870390057563782, + "learning_rate": 9.623098173300654e-05, + "loss": 0.7187, + "step": 960 + }, + { + "epoch": 1.544, + "grad_norm": 0.702232837677002, + "learning_rate": 9.539396099591476e-05, + "loss": 0.6156, + "step": 965 + }, + { + "epoch": 1.552, + "grad_norm": 0.694139301776886, + "learning_rate": 9.455726352649911e-05, + "loss": 0.6488, + "step": 970 + }, + { + "epoch": 1.56, + "grad_norm": 0.5684956312179565, + "learning_rate": 9.372094804706867e-05, + "loss": 0.6601, + "step": 975 + }, + { + "epoch": 1.568, + "grad_norm": 0.6327118277549744, + "learning_rate": 9.288507325312335e-05, + "loss": 0.5968, + "step": 980 + }, + { + "epoch": 1.576, + "grad_norm": 0.5732144117355347, + "learning_rate": 9.204969780923403e-05, + "loss": 0.7034, + "step": 985 + }, + { + "epoch": 1.584, + "grad_norm": 0.7025273442268372, + "learning_rate": 9.121488034492569e-05, + "loss": 0.5973, + "step": 990 + }, + { + "epoch": 1.592, + "grad_norm": 0.9270740151405334, + "learning_rate": 9.038067945056227e-05, + "loss": 0.7877, + "step": 995 + }, + { + "epoch": 1.6, + "grad_norm": 0.6676818132400513, + "learning_rate": 8.954715367323468e-05, + "loss": 0.644, + "step": 1000 + }, + { + "epoch": 1.608, + "grad_norm": 0.6687547564506531, + "learning_rate": 8.871436151265184e-05, + "loss": 0.6678, + "step": 1005 + }, + { + "epoch": 1.616, + "grad_norm": 0.7400322556495667, + "learning_rate": 8.788236141703498e-05, + "loss": 0.6088, + "step": 1010 + }, + { + "epoch": 1.624, + "grad_norm": 0.5504963994026184, + "learning_rate": 8.705121177901532e-05, + "loss": 0.6219, + "step": 1015 + }, + { + "epoch": 1.6320000000000001, + "grad_norm": 0.8088738322257996, + "learning_rate": 8.62209709315362e-05, + "loss": 0.6698, + "step": 1020 + }, + { + "epoch": 1.6400000000000001, + "grad_norm": 0.7380816340446472, + "learning_rate": 8.539169714375885e-05, + "loss": 0.6207, + "step": 1025 + }, + { + "epoch": 1.6480000000000001, + "grad_norm": 0.6346850395202637, + "learning_rate": 8.456344861697289e-05, + "loss": 0.626, + "step": 1030 + }, + { + "epoch": 1.6560000000000001, + "grad_norm": 0.49918702244758606, + "learning_rate": 8.373628348051165e-05, + "loss": 0.6972, + "step": 1035 + }, + { + "epoch": 1.6640000000000001, + "grad_norm": 0.7200607657432556, + "learning_rate": 8.291025978767235e-05, + "loss": 0.6282, + "step": 1040 + }, + { + "epoch": 1.6720000000000002, + "grad_norm": 0.8350688815116882, + "learning_rate": 8.208543551164178e-05, + "loss": 0.6219, + "step": 1045 + }, + { + "epoch": 1.6800000000000002, + "grad_norm": 0.5957468748092651, + "learning_rate": 8.126186854142752e-05, + "loss": 0.622, + "step": 1050 + }, + { + "epoch": 1.688, + "grad_norm": 0.7494757175445557, + "learning_rate": 8.04396166777952e-05, + "loss": 0.5801, + "step": 1055 + }, + { + "epoch": 1.696, + "grad_norm": 0.7109113931655884, + "learning_rate": 7.961873762921153e-05, + "loss": 0.598, + "step": 1060 + }, + { + "epoch": 1.704, + "grad_norm": 0.7941219210624695, + "learning_rate": 7.879928900779456e-05, + "loss": 0.699, + "step": 1065 + }, + { + "epoch": 1.712, + "grad_norm": 0.7282152771949768, + "learning_rate": 7.798132832526986e-05, + "loss": 0.5882, + "step": 1070 + }, + { + "epoch": 1.72, + "grad_norm": 0.5923735499382019, + "learning_rate": 7.716491298893442e-05, + "loss": 0.5321, + "step": 1075 + }, + { + "epoch": 1.728, + "grad_norm": 0.5571523308753967, + "learning_rate": 7.635010029762756e-05, + "loss": 0.6647, + "step": 1080 + }, + { + "epoch": 1.736, + "grad_norm": 0.6043043732643127, + "learning_rate": 7.553694743770928e-05, + "loss": 0.628, + "step": 1085 + }, + { + "epoch": 1.744, + "grad_norm": 0.8733720183372498, + "learning_rate": 7.472551147904708e-05, + "loss": 0.6262, + "step": 1090 + }, + { + "epoch": 1.752, + "grad_norm": 0.5561848878860474, + "learning_rate": 7.391584937101033e-05, + "loss": 0.6131, + "step": 1095 + }, + { + "epoch": 1.76, + "grad_norm": 0.6611356735229492, + "learning_rate": 7.310801793847344e-05, + "loss": 0.6494, + "step": 1100 + } + ], + "logging_steps": 5, + "max_steps": 1875, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6.87313708080169e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1100/training_args.bin b/checkpoint-1100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b72392a9094690809605a9b89988a05f68c770c0 --- /dev/null +++ b/checkpoint-1100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f6303cd7c1754cff6fd463f0e4dddf2a566ba7f29bbf2a72fbb560b32d5ce98 +size 5432 diff --git a/checkpoint-1200/README.md b/checkpoint-1200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..31d07dbe00996b0568fd0eb91d05ac9a95722459 --- /dev/null +++ b/checkpoint-1200/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.2-1B +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-1200/adapter_config.json b/checkpoint-1200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73202e5c7ba7132c890e52da7442b36a9efff4dd --- /dev/null +++ b/checkpoint-1200/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.2-1B", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "down_proj", + "k_proj", + "gate_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1200/adapter_model.safetensors b/checkpoint-1200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c5424ea4f404d1f587c659753f411ba6e884791 --- /dev/null +++ b/checkpoint-1200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbf0cd1315fb42578fcd6e8894dbb7b7851f625319b7773d9e8c97703287791c +size 22573704 diff --git a/checkpoint-1200/optimizer.pt b/checkpoint-1200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc281997201ac57468eac0315ef7fc9085adfa84 --- /dev/null +++ b/checkpoint-1200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b44bd95bf1c5cee85797d2b39182fe602837c2698bbf04ae60a4ff443d11813 +size 45276986 diff --git a/checkpoint-1200/rng_state.pth b/checkpoint-1200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..33cefe6919222ddfa3c3946df69b8e5c5a17a0fc --- /dev/null +++ b/checkpoint-1200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ff264f99d31b522cc7e2a4eac9d38606d0c58a34c0adc74d71e0ca8b371dc36 +size 14244 diff --git a/checkpoint-1200/scheduler.pt b/checkpoint-1200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..55eeeda0e29bcfad8be82da4ee94785a3f5563a5 --- /dev/null +++ b/checkpoint-1200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ce8836fce4037aad29e963cdec7ec8c16775fff070b0745d426b1929aeb3b03 +size 1064 diff --git a/checkpoint-1200/special_tokens_map.json b/checkpoint-1200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/checkpoint-1200/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/checkpoint-1200/tokenizer.json b/checkpoint-1200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-1200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-1200/tokenizer_config.json b/checkpoint-1200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61c03bf2b5e0c03056cebf7a58c8c096dafc1ffc --- /dev/null +++ b/checkpoint-1200/tokenizer_config.json @@ -0,0 +1,2065 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end_of_text|>' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|end_of_text|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1200/trainer_state.json b/checkpoint-1200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1c0516de9b3540575661a92b8e1efc7c46c8ab65 --- /dev/null +++ b/checkpoint-1200/trainer_state.json @@ -0,0 +1,1713 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.92, + "eval_steps": 500, + "global_step": 1200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008, + "grad_norm": 0.8485889434814453, + "learning_rate": 0.0001999964908278481, + "loss": 1.2049, + "step": 5 + }, + { + "epoch": 0.016, + "grad_norm": 0.47789862751960754, + "learning_rate": 0.00019998596355767805, + "loss": 0.9333, + "step": 10 + }, + { + "epoch": 0.024, + "grad_norm": 1.017558217048645, + "learning_rate": 0.00019996841892833, + "loss": 0.8671, + "step": 15 + }, + { + "epoch": 0.032, + "grad_norm": 0.6610977053642273, + "learning_rate": 0.00019994385817114646, + "loss": 0.7979, + "step": 20 + }, + { + "epoch": 0.04, + "grad_norm": 0.6075429320335388, + "learning_rate": 0.00019991228300988585, + "loss": 0.7662, + "step": 25 + }, + { + "epoch": 0.048, + "grad_norm": 0.6595763564109802, + "learning_rate": 0.00019987369566060176, + "loss": 0.7929, + "step": 30 + }, + { + "epoch": 0.056, + "grad_norm": 0.6968618035316467, + "learning_rate": 0.00019982809883148722, + "loss": 0.7683, + "step": 35 + }, + { + "epoch": 0.064, + "grad_norm": 0.4889592230319977, + "learning_rate": 0.00019977549572268468, + "loss": 0.8667, + "step": 40 + }, + { + "epoch": 0.072, + "grad_norm": 0.6651108264923096, + "learning_rate": 0.0001997158900260614, + "loss": 0.8446, + "step": 45 + }, + { + "epoch": 0.08, + "grad_norm": 0.5898510217666626, + "learning_rate": 0.00019964928592495045, + "loss": 0.9051, + "step": 50 + }, + { + "epoch": 0.088, + "grad_norm": 0.4398016035556793, + "learning_rate": 0.00019957568809385694, + "loss": 0.7235, + "step": 55 + }, + { + "epoch": 0.096, + "grad_norm": 0.6901968121528625, + "learning_rate": 0.00019949510169813003, + "loss": 0.8169, + "step": 60 + }, + { + "epoch": 0.104, + "grad_norm": 0.6267213225364685, + "learning_rate": 0.00019940753239360047, + "loss": 0.8266, + "step": 65 + }, + { + "epoch": 0.112, + "grad_norm": 0.48524895310401917, + "learning_rate": 0.00019931298632618356, + "loss": 0.758, + "step": 70 + }, + { + "epoch": 0.12, + "grad_norm": 0.5294132232666016, + "learning_rate": 0.0001992114701314478, + "loss": 0.7759, + "step": 75 + }, + { + "epoch": 0.128, + "grad_norm": 0.48957982659339905, + "learning_rate": 0.0001991029909341493, + "loss": 0.7797, + "step": 80 + }, + { + "epoch": 0.136, + "grad_norm": 0.645412802696228, + "learning_rate": 0.00019898755634773158, + "loss": 0.7437, + "step": 85 + }, + { + "epoch": 0.144, + "grad_norm": 0.43297675251960754, + "learning_rate": 0.0001988651744737914, + "loss": 0.8043, + "step": 90 + }, + { + "epoch": 0.152, + "grad_norm": 0.5513920783996582, + "learning_rate": 0.00019873585390151003, + "loss": 0.7701, + "step": 95 + }, + { + "epoch": 0.16, + "grad_norm": 0.8462435007095337, + "learning_rate": 0.0001985996037070505, + "loss": 0.709, + "step": 100 + }, + { + "epoch": 0.168, + "grad_norm": 0.6892585158348083, + "learning_rate": 0.00019845643345292054, + "loss": 0.7377, + "step": 105 + }, + { + "epoch": 0.176, + "grad_norm": 0.4617864191532135, + "learning_rate": 0.00019830635318730154, + "loss": 0.8352, + "step": 110 + }, + { + "epoch": 0.184, + "grad_norm": 0.6300354599952698, + "learning_rate": 0.0001981493734433433, + "loss": 0.7738, + "step": 115 + }, + { + "epoch": 0.192, + "grad_norm": 0.8086859583854675, + "learning_rate": 0.0001979855052384247, + "loss": 0.8067, + "step": 120 + }, + { + "epoch": 0.2, + "grad_norm": 0.6272985935211182, + "learning_rate": 0.00019781476007338058, + "loss": 0.7456, + "step": 125 + }, + { + "epoch": 0.208, + "grad_norm": 0.44750839471817017, + "learning_rate": 0.00019763714993169452, + "loss": 0.758, + "step": 130 + }, + { + "epoch": 0.216, + "grad_norm": 0.5053977370262146, + "learning_rate": 0.00019745268727865774, + "loss": 0.7895, + "step": 135 + }, + { + "epoch": 0.224, + "grad_norm": 0.41920769214630127, + "learning_rate": 0.00019726138506049438, + "loss": 0.7302, + "step": 140 + }, + { + "epoch": 0.232, + "grad_norm": 0.38280290365219116, + "learning_rate": 0.00019706325670345275, + "loss": 0.8152, + "step": 145 + }, + { + "epoch": 0.24, + "grad_norm": 0.554710865020752, + "learning_rate": 0.0001968583161128631, + "loss": 0.8461, + "step": 150 + }, + { + "epoch": 0.248, + "grad_norm": 0.5612509250640869, + "learning_rate": 0.00019664657767216176, + "loss": 0.7787, + "step": 155 + }, + { + "epoch": 0.256, + "grad_norm": 0.610614538192749, + "learning_rate": 0.00019642805624188147, + "loss": 0.7574, + "step": 160 + }, + { + "epoch": 0.264, + "grad_norm": 0.679517924785614, + "learning_rate": 0.0001962027671586086, + "loss": 0.8487, + "step": 165 + }, + { + "epoch": 0.272, + "grad_norm": 0.6685434579849243, + "learning_rate": 0.00019597072623390668, + "loss": 0.6611, + "step": 170 + }, + { + "epoch": 0.28, + "grad_norm": 0.480293869972229, + "learning_rate": 0.00019573194975320673, + "loss": 0.7802, + "step": 175 + }, + { + "epoch": 0.288, + "grad_norm": 0.7727369070053101, + "learning_rate": 0.00019548645447466431, + "loss": 0.6727, + "step": 180 + }, + { + "epoch": 0.296, + "grad_norm": 0.6371043920516968, + "learning_rate": 0.00019523425762798329, + "loss": 0.7502, + "step": 185 + }, + { + "epoch": 0.304, + "grad_norm": 0.6399966478347778, + "learning_rate": 0.00019497537691320668, + "loss": 0.8401, + "step": 190 + }, + { + "epoch": 0.312, + "grad_norm": 0.7263137698173523, + "learning_rate": 0.00019470983049947444, + "loss": 0.7494, + "step": 195 + }, + { + "epoch": 0.32, + "grad_norm": 0.402416467666626, + "learning_rate": 0.00019443763702374812, + "loss": 0.7842, + "step": 200 + }, + { + "epoch": 0.328, + "grad_norm": 0.6639626026153564, + "learning_rate": 0.00019415881558950302, + "loss": 0.8082, + "step": 205 + }, + { + "epoch": 0.336, + "grad_norm": 0.5801042914390564, + "learning_rate": 0.00019387338576538744, + "loss": 0.7883, + "step": 210 + }, + { + "epoch": 0.344, + "grad_norm": 0.5533607006072998, + "learning_rate": 0.00019358136758384912, + "loss": 0.7356, + "step": 215 + }, + { + "epoch": 0.352, + "grad_norm": 0.6019654273986816, + "learning_rate": 0.00019328278153972947, + "loss": 0.7891, + "step": 220 + }, + { + "epoch": 0.36, + "grad_norm": 0.5344104170799255, + "learning_rate": 0.00019297764858882514, + "loss": 0.7671, + "step": 225 + }, + { + "epoch": 0.368, + "grad_norm": 0.5494843125343323, + "learning_rate": 0.0001926659901464172, + "loss": 0.6608, + "step": 230 + }, + { + "epoch": 0.376, + "grad_norm": 0.465420126914978, + "learning_rate": 0.00019234782808576824, + "loss": 0.647, + "step": 235 + }, + { + "epoch": 0.384, + "grad_norm": 0.5202775001525879, + "learning_rate": 0.00019202318473658705, + "loss": 0.729, + "step": 240 + }, + { + "epoch": 0.392, + "grad_norm": 0.5757818222045898, + "learning_rate": 0.00019169208288346166, + "loss": 0.6713, + "step": 245 + }, + { + "epoch": 0.4, + "grad_norm": 0.46555572748184204, + "learning_rate": 0.0001913545457642601, + "loss": 0.7049, + "step": 250 + }, + { + "epoch": 0.408, + "grad_norm": 0.5101790428161621, + "learning_rate": 0.00019101059706849957, + "loss": 0.7419, + "step": 255 + }, + { + "epoch": 0.416, + "grad_norm": 0.6083744764328003, + "learning_rate": 0.00019066026093568378, + "loss": 0.7148, + "step": 260 + }, + { + "epoch": 0.424, + "grad_norm": 0.4719640612602234, + "learning_rate": 0.00019030356195360874, + "loss": 0.7493, + "step": 265 + }, + { + "epoch": 0.432, + "grad_norm": 0.7365225553512573, + "learning_rate": 0.0001899405251566371, + "loss": 0.7652, + "step": 270 + }, + { + "epoch": 0.44, + "grad_norm": 0.4452705383300781, + "learning_rate": 0.0001895711760239413, + "loss": 0.7438, + "step": 275 + }, + { + "epoch": 0.448, + "grad_norm": 0.6071786284446716, + "learning_rate": 0.0001891955404777151, + "loss": 0.7683, + "step": 280 + }, + { + "epoch": 0.456, + "grad_norm": 0.5774498581886292, + "learning_rate": 0.00018881364488135448, + "loss": 0.8115, + "step": 285 + }, + { + "epoch": 0.464, + "grad_norm": 0.6134682893753052, + "learning_rate": 0.00018842551603760724, + "loss": 0.8335, + "step": 290 + }, + { + "epoch": 0.472, + "grad_norm": 0.4869893193244934, + "learning_rate": 0.00018803118118669202, + "loss": 0.6933, + "step": 295 + }, + { + "epoch": 0.48, + "grad_norm": 0.6457111239433289, + "learning_rate": 0.00018763066800438636, + "loss": 0.7515, + "step": 300 + }, + { + "epoch": 0.488, + "grad_norm": 0.59674471616745, + "learning_rate": 0.0001872240046000844, + "loss": 0.6931, + "step": 305 + }, + { + "epoch": 0.496, + "grad_norm": 0.44608160853385925, + "learning_rate": 0.00018681121951482393, + "loss": 0.782, + "step": 310 + }, + { + "epoch": 0.504, + "grad_norm": 0.5934664607048035, + "learning_rate": 0.00018639234171928353, + "loss": 0.7361, + "step": 315 + }, + { + "epoch": 0.512, + "grad_norm": 0.49716323614120483, + "learning_rate": 0.0001859674006117491, + "loss": 0.7443, + "step": 320 + }, + { + "epoch": 0.52, + "grad_norm": 0.47995495796203613, + "learning_rate": 0.00018553642601605068, + "loss": 0.7221, + "step": 325 + }, + { + "epoch": 0.528, + "grad_norm": 0.5177399516105652, + "learning_rate": 0.00018509944817946922, + "loss": 0.7622, + "step": 330 + }, + { + "epoch": 0.536, + "grad_norm": 0.6638798713684082, + "learning_rate": 0.0001846564977706138, + "loss": 0.8556, + "step": 335 + }, + { + "epoch": 0.544, + "grad_norm": 0.5056771636009216, + "learning_rate": 0.00018420760587726923, + "loss": 0.7814, + "step": 340 + }, + { + "epoch": 0.552, + "grad_norm": 0.44543707370758057, + "learning_rate": 0.0001837528040042142, + "loss": 0.722, + "step": 345 + }, + { + "epoch": 0.56, + "grad_norm": 0.6765120625495911, + "learning_rate": 0.00018329212407100994, + "loss": 0.7903, + "step": 350 + }, + { + "epoch": 0.568, + "grad_norm": 0.49232372641563416, + "learning_rate": 0.00018282559840976042, + "loss": 0.6996, + "step": 355 + }, + { + "epoch": 0.576, + "grad_norm": 0.47392791509628296, + "learning_rate": 0.00018235325976284275, + "loss": 0.773, + "step": 360 + }, + { + "epoch": 0.584, + "grad_norm": 0.5056615471839905, + "learning_rate": 0.00018187514128060946, + "loss": 0.728, + "step": 365 + }, + { + "epoch": 0.592, + "grad_norm": 0.5857616662979126, + "learning_rate": 0.00018139127651906184, + "loss": 0.7659, + "step": 370 + }, + { + "epoch": 0.6, + "grad_norm": 0.5966864228248596, + "learning_rate": 0.00018090169943749476, + "loss": 0.7039, + "step": 375 + }, + { + "epoch": 0.608, + "grad_norm": 0.4524347484111786, + "learning_rate": 0.00018040644439611348, + "loss": 0.7125, + "step": 380 + }, + { + "epoch": 0.616, + "grad_norm": 0.5570976138114929, + "learning_rate": 0.00017990554615362198, + "loss": 0.698, + "step": 385 + }, + { + "epoch": 0.624, + "grad_norm": 0.6045777201652527, + "learning_rate": 0.00017939903986478355, + "loss": 0.8255, + "step": 390 + }, + { + "epoch": 0.632, + "grad_norm": 0.6149687767028809, + "learning_rate": 0.00017888696107795342, + "loss": 0.6616, + "step": 395 + }, + { + "epoch": 0.64, + "grad_norm": 0.4873579144477844, + "learning_rate": 0.000178369345732584, + "loss": 0.7452, + "step": 400 + }, + { + "epoch": 0.648, + "grad_norm": 0.5569061636924744, + "learning_rate": 0.00017784623015670238, + "loss": 0.7652, + "step": 405 + }, + { + "epoch": 0.656, + "grad_norm": 0.5825181603431702, + "learning_rate": 0.00017731765106436073, + "loss": 0.7793, + "step": 410 + }, + { + "epoch": 0.664, + "grad_norm": 0.4047383666038513, + "learning_rate": 0.00017678364555305978, + "loss": 0.6875, + "step": 415 + }, + { + "epoch": 0.672, + "grad_norm": 0.5080836415290833, + "learning_rate": 0.0001762442511011448, + "loss": 0.7465, + "step": 420 + }, + { + "epoch": 0.68, + "grad_norm": 0.5825940370559692, + "learning_rate": 0.00017569950556517566, + "loss": 0.7205, + "step": 425 + }, + { + "epoch": 0.688, + "grad_norm": 0.476992666721344, + "learning_rate": 0.00017514944717726962, + "loss": 0.6589, + "step": 430 + }, + { + "epoch": 0.696, + "grad_norm": 0.7424727082252502, + "learning_rate": 0.00017459411454241822, + "loss": 0.7035, + "step": 435 + }, + { + "epoch": 0.704, + "grad_norm": 0.6544787287712097, + "learning_rate": 0.00017403354663577783, + "loss": 0.787, + "step": 440 + }, + { + "epoch": 0.712, + "grad_norm": 0.49425187706947327, + "learning_rate": 0.00017346778279993415, + "loss": 0.7515, + "step": 445 + }, + { + "epoch": 0.72, + "grad_norm": 0.5473236441612244, + "learning_rate": 0.00017289686274214118, + "loss": 0.7199, + "step": 450 + }, + { + "epoch": 0.728, + "grad_norm": 0.6773544549942017, + "learning_rate": 0.00017232082653153422, + "loss": 0.8037, + "step": 455 + }, + { + "epoch": 0.736, + "grad_norm": 0.6355096101760864, + "learning_rate": 0.00017173971459631787, + "loss": 0.7502, + "step": 460 + }, + { + "epoch": 0.744, + "grad_norm": 0.47867000102996826, + "learning_rate": 0.00017115356772092857, + "loss": 0.7446, + "step": 465 + }, + { + "epoch": 0.752, + "grad_norm": 0.5135357975959778, + "learning_rate": 0.0001705624270431721, + "loss": 0.6507, + "step": 470 + }, + { + "epoch": 0.76, + "grad_norm": 0.48866042494773865, + "learning_rate": 0.00016996633405133655, + "loss": 0.7164, + "step": 475 + }, + { + "epoch": 0.768, + "grad_norm": 0.5892354249954224, + "learning_rate": 0.0001693653305812805, + "loss": 0.7621, + "step": 480 + }, + { + "epoch": 0.776, + "grad_norm": 0.6633970141410828, + "learning_rate": 0.00016875945881349676, + "loss": 0.7623, + "step": 485 + }, + { + "epoch": 0.784, + "grad_norm": 0.6444060802459717, + "learning_rate": 0.000168148761270152, + "loss": 0.6606, + "step": 490 + }, + { + "epoch": 0.792, + "grad_norm": 0.7012648582458496, + "learning_rate": 0.00016753328081210245, + "loss": 0.6941, + "step": 495 + }, + { + "epoch": 0.8, + "grad_norm": 0.7064160704612732, + "learning_rate": 0.00016691306063588583, + "loss": 0.6841, + "step": 500 + }, + { + "epoch": 0.808, + "grad_norm": 0.7241398096084595, + "learning_rate": 0.00016628814427068953, + "loss": 0.6996, + "step": 505 + }, + { + "epoch": 0.816, + "grad_norm": 0.7807374596595764, + "learning_rate": 0.00016565857557529566, + "loss": 0.7542, + "step": 510 + }, + { + "epoch": 0.824, + "grad_norm": 0.763768196105957, + "learning_rate": 0.00016502439873500289, + "loss": 0.7175, + "step": 515 + }, + { + "epoch": 0.832, + "grad_norm": 0.6105090379714966, + "learning_rate": 0.0001643856582585254, + "loss": 0.7565, + "step": 520 + }, + { + "epoch": 0.84, + "grad_norm": 0.5686540603637695, + "learning_rate": 0.000163742398974869, + "loss": 0.7339, + "step": 525 + }, + { + "epoch": 0.848, + "grad_norm": 0.5341500043869019, + "learning_rate": 0.00016309466603018496, + "loss": 0.569, + "step": 530 + }, + { + "epoch": 0.856, + "grad_norm": 0.7274748682975769, + "learning_rate": 0.00016244250488460158, + "loss": 0.7556, + "step": 535 + }, + { + "epoch": 0.864, + "grad_norm": 0.7321165204048157, + "learning_rate": 0.00016178596130903344, + "loss": 0.7084, + "step": 540 + }, + { + "epoch": 0.872, + "grad_norm": 0.5086159110069275, + "learning_rate": 0.00016112508138196917, + "loss": 0.6935, + "step": 545 + }, + { + "epoch": 0.88, + "grad_norm": 0.4714389443397522, + "learning_rate": 0.0001604599114862375, + "loss": 0.7076, + "step": 550 + }, + { + "epoch": 0.888, + "grad_norm": 0.5031452178955078, + "learning_rate": 0.0001597904983057519, + "loss": 0.7151, + "step": 555 + }, + { + "epoch": 0.896, + "grad_norm": 0.7745943665504456, + "learning_rate": 0.0001591168888222342, + "loss": 0.7001, + "step": 560 + }, + { + "epoch": 0.904, + "grad_norm": 0.6076303124427795, + "learning_rate": 0.00015843913031191723, + "loss": 0.7285, + "step": 565 + }, + { + "epoch": 0.912, + "grad_norm": 0.7456529140472412, + "learning_rate": 0.00015775727034222675, + "loss": 0.8041, + "step": 570 + }, + { + "epoch": 0.92, + "grad_norm": 0.5760998725891113, + "learning_rate": 0.0001570713567684432, + "loss": 0.7353, + "step": 575 + }, + { + "epoch": 0.928, + "grad_norm": 0.7057327032089233, + "learning_rate": 0.00015638143773034267, + "loss": 0.7792, + "step": 580 + }, + { + "epoch": 0.936, + "grad_norm": 0.7615967392921448, + "learning_rate": 0.00015568756164881882, + "loss": 1.0121, + "step": 585 + }, + { + "epoch": 0.944, + "grad_norm": 0.6304950714111328, + "learning_rate": 0.000154989777222484, + "loss": 0.7727, + "step": 590 + }, + { + "epoch": 0.952, + "grad_norm": 0.6852543950080872, + "learning_rate": 0.00015428813342425177, + "loss": 0.741, + "step": 595 + }, + { + "epoch": 0.96, + "grad_norm": 0.6379660964012146, + "learning_rate": 0.00015358267949789966, + "loss": 0.6919, + "step": 600 + }, + { + "epoch": 0.968, + "grad_norm": 0.5846463441848755, + "learning_rate": 0.00015287346495461315, + "loss": 0.7163, + "step": 605 + }, + { + "epoch": 0.976, + "grad_norm": 0.5999557971954346, + "learning_rate": 0.0001521605395695108, + "loss": 0.8152, + "step": 610 + }, + { + "epoch": 0.984, + "grad_norm": 0.5806307196617126, + "learning_rate": 0.00015144395337815064, + "loss": 0.6709, + "step": 615 + }, + { + "epoch": 0.992, + "grad_norm": 0.6559942960739136, + "learning_rate": 0.00015072375667301893, + "loss": 0.6527, + "step": 620 + }, + { + "epoch": 1.0, + "grad_norm": 0.6287715435028076, + "learning_rate": 0.00015000000000000001, + "loss": 0.8194, + "step": 625 + }, + { + "epoch": 1.008, + "grad_norm": 0.616222620010376, + "learning_rate": 0.00014927273415482915, + "loss": 0.6627, + "step": 630 + }, + { + "epoch": 1.016, + "grad_norm": 0.4750412106513977, + "learning_rate": 0.0001485420101795274, + "loss": 0.6366, + "step": 635 + }, + { + "epoch": 1.024, + "grad_norm": 0.5122964978218079, + "learning_rate": 0.00014780787935881923, + "loss": 0.6717, + "step": 640 + }, + { + "epoch": 1.032, + "grad_norm": 0.7382633090019226, + "learning_rate": 0.0001470703932165333, + "loss": 0.6483, + "step": 645 + }, + { + "epoch": 1.04, + "grad_norm": 0.6540554761886597, + "learning_rate": 0.00014632960351198618, + "loss": 0.6151, + "step": 650 + }, + { + "epoch": 1.048, + "grad_norm": 0.4776591956615448, + "learning_rate": 0.00014558556223635003, + "loss": 0.6707, + "step": 655 + }, + { + "epoch": 1.056, + "grad_norm": 0.8012662529945374, + "learning_rate": 0.00014483832160900326, + "loss": 0.6125, + "step": 660 + }, + { + "epoch": 1.064, + "grad_norm": 0.6735953092575073, + "learning_rate": 0.00014408793407386588, + "loss": 0.6206, + "step": 665 + }, + { + "epoch": 1.072, + "grad_norm": 0.5640230774879456, + "learning_rate": 0.00014333445229571873, + "loss": 0.6161, + "step": 670 + }, + { + "epoch": 1.08, + "grad_norm": 0.5928654074668884, + "learning_rate": 0.00014257792915650728, + "loss": 0.6583, + "step": 675 + }, + { + "epoch": 1.088, + "grad_norm": 0.7347397208213806, + "learning_rate": 0.00014181841775163013, + "loss": 0.6222, + "step": 680 + }, + { + "epoch": 1.096, + "grad_norm": 0.593773365020752, + "learning_rate": 0.0001410559713862128, + "loss": 0.716, + "step": 685 + }, + { + "epoch": 1.104, + "grad_norm": 0.6244611144065857, + "learning_rate": 0.00014029064357136628, + "loss": 0.6198, + "step": 690 + }, + { + "epoch": 1.112, + "grad_norm": 0.5083370804786682, + "learning_rate": 0.00013952248802043165, + "loss": 0.6389, + "step": 695 + }, + { + "epoch": 1.12, + "grad_norm": 0.5241413116455078, + "learning_rate": 0.0001387515586452103, + "loss": 0.6842, + "step": 700 + }, + { + "epoch": 1.1280000000000001, + "grad_norm": 0.524029016494751, + "learning_rate": 0.00013797790955218014, + "loss": 0.6071, + "step": 705 + }, + { + "epoch": 1.1360000000000001, + "grad_norm": 0.5097878575325012, + "learning_rate": 0.00013720159503869815, + "loss": 0.5915, + "step": 710 + }, + { + "epoch": 1.144, + "grad_norm": 0.5782963037490845, + "learning_rate": 0.00013642266958918984, + "loss": 0.6794, + "step": 715 + }, + { + "epoch": 1.152, + "grad_norm": 0.6088266372680664, + "learning_rate": 0.00013564118787132506, + "loss": 0.6773, + "step": 720 + }, + { + "epoch": 1.16, + "grad_norm": 0.7768995761871338, + "learning_rate": 0.00013485720473218154, + "loss": 0.668, + "step": 725 + }, + { + "epoch": 1.168, + "grad_norm": 0.6645551919937134, + "learning_rate": 0.0001340707751943952, + "loss": 0.6997, + "step": 730 + }, + { + "epoch": 1.176, + "grad_norm": 0.9228842258453369, + "learning_rate": 0.00013328195445229868, + "loss": 0.831, + "step": 735 + }, + { + "epoch": 1.184, + "grad_norm": 0.7556049823760986, + "learning_rate": 0.00013249079786804765, + "loss": 0.6378, + "step": 740 + }, + { + "epoch": 1.192, + "grad_norm": 0.832775354385376, + "learning_rate": 0.0001316973609677352, + "loss": 0.6547, + "step": 745 + }, + { + "epoch": 1.2, + "grad_norm": 0.7329304814338684, + "learning_rate": 0.00013090169943749476, + "loss": 0.5808, + "step": 750 + }, + { + "epoch": 1.208, + "grad_norm": 0.7193475961685181, + "learning_rate": 0.00013010386911959206, + "loss": 0.5582, + "step": 755 + }, + { + "epoch": 1.216, + "grad_norm": 0.6274734735488892, + "learning_rate": 0.00012930392600850573, + "loss": 0.5801, + "step": 760 + }, + { + "epoch": 1.224, + "grad_norm": 0.6485865712165833, + "learning_rate": 0.0001285019262469976, + "loss": 0.65, + "step": 765 + }, + { + "epoch": 1.232, + "grad_norm": 0.7164427042007446, + "learning_rate": 0.00012769792612217224, + "loss": 0.6627, + "step": 770 + }, + { + "epoch": 1.24, + "grad_norm": 0.600775957107544, + "learning_rate": 0.00012689198206152657, + "loss": 0.5603, + "step": 775 + }, + { + "epoch": 1.248, + "grad_norm": 0.8377975225448608, + "learning_rate": 0.00012608415062898972, + "loss": 0.6525, + "step": 780 + }, + { + "epoch": 1.256, + "grad_norm": 0.8069924116134644, + "learning_rate": 0.00012527448852095295, + "loss": 0.6731, + "step": 785 + }, + { + "epoch": 1.264, + "grad_norm": 0.6501213908195496, + "learning_rate": 0.00012446305256229073, + "loss": 0.6255, + "step": 790 + }, + { + "epoch": 1.272, + "grad_norm": 0.62812340259552, + "learning_rate": 0.00012364989970237248, + "loss": 0.6585, + "step": 795 + }, + { + "epoch": 1.28, + "grad_norm": 0.5702307820320129, + "learning_rate": 0.00012283508701106557, + "loss": 0.5996, + "step": 800 + }, + { + "epoch": 1.288, + "grad_norm": 0.6311281323432922, + "learning_rate": 0.00012201867167473015, + "loss": 0.6355, + "step": 805 + }, + { + "epoch": 1.296, + "grad_norm": 0.5885419249534607, + "learning_rate": 0.00012120071099220549, + "loss": 0.6615, + "step": 810 + }, + { + "epoch": 1.304, + "grad_norm": 0.5239307284355164, + "learning_rate": 0.0001203812623707885, + "loss": 0.6096, + "step": 815 + }, + { + "epoch": 1.312, + "grad_norm": 0.6101869940757751, + "learning_rate": 0.00011956038332220483, + "loss": 0.5984, + "step": 820 + }, + { + "epoch": 1.32, + "grad_norm": 0.4395413100719452, + "learning_rate": 0.00011873813145857249, + "loss": 0.5569, + "step": 825 + }, + { + "epoch": 1.328, + "grad_norm": 0.8984820246696472, + "learning_rate": 0.00011791456448835825, + "loss": 0.7088, + "step": 830 + }, + { + "epoch": 1.336, + "grad_norm": 0.7709664106369019, + "learning_rate": 0.00011708974021232769, + "loss": 0.6731, + "step": 835 + }, + { + "epoch": 1.3439999999999999, + "grad_norm": 0.6782217025756836, + "learning_rate": 0.00011626371651948838, + "loss": 0.6188, + "step": 840 + }, + { + "epoch": 1.3519999999999999, + "grad_norm": 0.6427358984947205, + "learning_rate": 0.00011543655138302714, + "loss": 0.7004, + "step": 845 + }, + { + "epoch": 1.3599999999999999, + "grad_norm": 0.5902594923973083, + "learning_rate": 0.00011460830285624118, + "loss": 0.5884, + "step": 850 + }, + { + "epoch": 1.3679999999999999, + "grad_norm": 0.5935835838317871, + "learning_rate": 0.0001137790290684638, + "loss": 0.5739, + "step": 855 + }, + { + "epoch": 1.376, + "grad_norm": 0.6752728223800659, + "learning_rate": 0.00011294878822098469, + "loss": 0.6435, + "step": 860 + }, + { + "epoch": 1.384, + "grad_norm": 0.7927135825157166, + "learning_rate": 0.00011211763858296507, + "loss": 0.6897, + "step": 865 + }, + { + "epoch": 1.392, + "grad_norm": 0.714499294757843, + "learning_rate": 0.00011128563848734816, + "loss": 0.6641, + "step": 870 + }, + { + "epoch": 1.4, + "grad_norm": 0.7086356282234192, + "learning_rate": 0.00011045284632676536, + "loss": 0.6273, + "step": 875 + }, + { + "epoch": 1.408, + "grad_norm": 0.6125518679618835, + "learning_rate": 0.00010961932054943778, + "loss": 0.6437, + "step": 880 + }, + { + "epoch": 1.416, + "grad_norm": 0.5635287165641785, + "learning_rate": 0.00010878511965507434, + "loss": 0.6345, + "step": 885 + }, + { + "epoch": 1.424, + "grad_norm": 0.47936007380485535, + "learning_rate": 0.00010795030219076599, + "loss": 0.5913, + "step": 890 + }, + { + "epoch": 1.432, + "grad_norm": 0.7142558097839355, + "learning_rate": 0.00010711492674687671, + "loss": 0.6482, + "step": 895 + }, + { + "epoch": 1.44, + "grad_norm": 0.5252729058265686, + "learning_rate": 0.00010627905195293135, + "loss": 0.6165, + "step": 900 + }, + { + "epoch": 1.448, + "grad_norm": 0.896318793296814, + "learning_rate": 0.00010544273647350092, + "loss": 0.634, + "step": 905 + }, + { + "epoch": 1.456, + "grad_norm": 0.6029036045074463, + "learning_rate": 0.00010460603900408523, + "loss": 0.6509, + "step": 910 + }, + { + "epoch": 1.464, + "grad_norm": 0.6835671663284302, + "learning_rate": 0.00010376901826699348, + "loss": 0.6212, + "step": 915 + }, + { + "epoch": 1.472, + "grad_norm": 0.7098750472068787, + "learning_rate": 0.00010293173300722285, + "loss": 0.7305, + "step": 920 + }, + { + "epoch": 1.48, + "grad_norm": 0.675316333770752, + "learning_rate": 0.0001020942419883357, + "loss": 0.6685, + "step": 925 + }, + { + "epoch": 1.488, + "grad_norm": 0.6675406098365784, + "learning_rate": 0.00010125660398833528, + "loss": 0.6214, + "step": 930 + }, + { + "epoch": 1.496, + "grad_norm": 0.6629154682159424, + "learning_rate": 0.0001004188777955404, + "loss": 0.6035, + "step": 935 + }, + { + "epoch": 1.504, + "grad_norm": 0.7732692360877991, + "learning_rate": 9.958112220445963e-05, + "loss": 0.5868, + "step": 940 + }, + { + "epoch": 1.512, + "grad_norm": 0.6238484978675842, + "learning_rate": 9.874339601166473e-05, + "loss": 0.6003, + "step": 945 + }, + { + "epoch": 1.52, + "grad_norm": 0.6622412800788879, + "learning_rate": 9.790575801166432e-05, + "loss": 0.5854, + "step": 950 + }, + { + "epoch": 1.528, + "grad_norm": 0.7598085999488831, + "learning_rate": 9.706826699277718e-05, + "loss": 0.5882, + "step": 955 + }, + { + "epoch": 1.536, + "grad_norm": 0.8870390057563782, + "learning_rate": 9.623098173300654e-05, + "loss": 0.7187, + "step": 960 + }, + { + "epoch": 1.544, + "grad_norm": 0.702232837677002, + "learning_rate": 9.539396099591476e-05, + "loss": 0.6156, + "step": 965 + }, + { + "epoch": 1.552, + "grad_norm": 0.694139301776886, + "learning_rate": 9.455726352649911e-05, + "loss": 0.6488, + "step": 970 + }, + { + "epoch": 1.56, + "grad_norm": 0.5684956312179565, + "learning_rate": 9.372094804706867e-05, + "loss": 0.6601, + "step": 975 + }, + { + "epoch": 1.568, + "grad_norm": 0.6327118277549744, + "learning_rate": 9.288507325312335e-05, + "loss": 0.5968, + "step": 980 + }, + { + "epoch": 1.576, + "grad_norm": 0.5732144117355347, + "learning_rate": 9.204969780923403e-05, + "loss": 0.7034, + "step": 985 + }, + { + "epoch": 1.584, + "grad_norm": 0.7025273442268372, + "learning_rate": 9.121488034492569e-05, + "loss": 0.5973, + "step": 990 + }, + { + "epoch": 1.592, + "grad_norm": 0.9270740151405334, + "learning_rate": 9.038067945056227e-05, + "loss": 0.7877, + "step": 995 + }, + { + "epoch": 1.6, + "grad_norm": 0.6676818132400513, + "learning_rate": 8.954715367323468e-05, + "loss": 0.644, + "step": 1000 + }, + { + "epoch": 1.608, + "grad_norm": 0.6687547564506531, + "learning_rate": 8.871436151265184e-05, + "loss": 0.6678, + "step": 1005 + }, + { + "epoch": 1.616, + "grad_norm": 0.7400322556495667, + "learning_rate": 8.788236141703498e-05, + "loss": 0.6088, + "step": 1010 + }, + { + "epoch": 1.624, + "grad_norm": 0.5504963994026184, + "learning_rate": 8.705121177901532e-05, + "loss": 0.6219, + "step": 1015 + }, + { + "epoch": 1.6320000000000001, + "grad_norm": 0.8088738322257996, + "learning_rate": 8.62209709315362e-05, + "loss": 0.6698, + "step": 1020 + }, + { + "epoch": 1.6400000000000001, + "grad_norm": 0.7380816340446472, + "learning_rate": 8.539169714375885e-05, + "loss": 0.6207, + "step": 1025 + }, + { + "epoch": 1.6480000000000001, + "grad_norm": 0.6346850395202637, + "learning_rate": 8.456344861697289e-05, + "loss": 0.626, + "step": 1030 + }, + { + "epoch": 1.6560000000000001, + "grad_norm": 0.49918702244758606, + "learning_rate": 8.373628348051165e-05, + "loss": 0.6972, + "step": 1035 + }, + { + "epoch": 1.6640000000000001, + "grad_norm": 0.7200607657432556, + "learning_rate": 8.291025978767235e-05, + "loss": 0.6282, + "step": 1040 + }, + { + "epoch": 1.6720000000000002, + "grad_norm": 0.8350688815116882, + "learning_rate": 8.208543551164178e-05, + "loss": 0.6219, + "step": 1045 + }, + { + "epoch": 1.6800000000000002, + "grad_norm": 0.5957468748092651, + "learning_rate": 8.126186854142752e-05, + "loss": 0.622, + "step": 1050 + }, + { + "epoch": 1.688, + "grad_norm": 0.7494757175445557, + "learning_rate": 8.04396166777952e-05, + "loss": 0.5801, + "step": 1055 + }, + { + "epoch": 1.696, + "grad_norm": 0.7109113931655884, + "learning_rate": 7.961873762921153e-05, + "loss": 0.598, + "step": 1060 + }, + { + "epoch": 1.704, + "grad_norm": 0.7941219210624695, + "learning_rate": 7.879928900779456e-05, + "loss": 0.699, + "step": 1065 + }, + { + "epoch": 1.712, + "grad_norm": 0.7282152771949768, + "learning_rate": 7.798132832526986e-05, + "loss": 0.5882, + "step": 1070 + }, + { + "epoch": 1.72, + "grad_norm": 0.5923735499382019, + "learning_rate": 7.716491298893442e-05, + "loss": 0.5321, + "step": 1075 + }, + { + "epoch": 1.728, + "grad_norm": 0.5571523308753967, + "learning_rate": 7.635010029762756e-05, + "loss": 0.6647, + "step": 1080 + }, + { + "epoch": 1.736, + "grad_norm": 0.6043043732643127, + "learning_rate": 7.553694743770928e-05, + "loss": 0.628, + "step": 1085 + }, + { + "epoch": 1.744, + "grad_norm": 0.8733720183372498, + "learning_rate": 7.472551147904708e-05, + "loss": 0.6262, + "step": 1090 + }, + { + "epoch": 1.752, + "grad_norm": 0.5561848878860474, + "learning_rate": 7.391584937101033e-05, + "loss": 0.6131, + "step": 1095 + }, + { + "epoch": 1.76, + "grad_norm": 0.6611356735229492, + "learning_rate": 7.310801793847344e-05, + "loss": 0.6494, + "step": 1100 + }, + { + "epoch": 1.768, + "grad_norm": 0.5700333714485168, + "learning_rate": 7.230207387782776e-05, + "loss": 0.5514, + "step": 1105 + }, + { + "epoch": 1.776, + "grad_norm": 0.705662727355957, + "learning_rate": 7.149807375300239e-05, + "loss": 0.5823, + "step": 1110 + }, + { + "epoch": 1.784, + "grad_norm": 0.9632449150085449, + "learning_rate": 7.069607399149428e-05, + "loss": 0.7207, + "step": 1115 + }, + { + "epoch": 1.792, + "grad_norm": 0.6412242650985718, + "learning_rate": 6.989613088040796e-05, + "loss": 0.7006, + "step": 1120 + }, + { + "epoch": 1.8, + "grad_norm": 0.538044273853302, + "learning_rate": 6.909830056250527e-05, + "loss": 0.6222, + "step": 1125 + }, + { + "epoch": 1.808, + "grad_norm": 0.6124762296676636, + "learning_rate": 6.830263903226483e-05, + "loss": 0.6569, + "step": 1130 + }, + { + "epoch": 1.8159999999999998, + "grad_norm": 0.7250857949256897, + "learning_rate": 6.750920213195238e-05, + "loss": 0.543, + "step": 1135 + }, + { + "epoch": 1.8239999999999998, + "grad_norm": 0.6840488910675049, + "learning_rate": 6.671804554770135e-05, + "loss": 0.6334, + "step": 1140 + }, + { + "epoch": 1.8319999999999999, + "grad_norm": 0.8932304382324219, + "learning_rate": 6.592922480560483e-05, + "loss": 0.6701, + "step": 1145 + }, + { + "epoch": 1.8399999999999999, + "grad_norm": 0.9220761656761169, + "learning_rate": 6.51427952678185e-05, + "loss": 0.6216, + "step": 1150 + }, + { + "epoch": 1.8479999999999999, + "grad_norm": 0.688731849193573, + "learning_rate": 6.435881212867493e-05, + "loss": 0.5877, + "step": 1155 + }, + { + "epoch": 1.8559999999999999, + "grad_norm": 0.6370134949684143, + "learning_rate": 6.357733041081018e-05, + "loss": 0.6256, + "step": 1160 + }, + { + "epoch": 1.8639999999999999, + "grad_norm": 0.7421153783798218, + "learning_rate": 6.27984049613019e-05, + "loss": 0.7062, + "step": 1165 + }, + { + "epoch": 1.8719999999999999, + "grad_norm": 0.8129108548164368, + "learning_rate": 6.20220904478199e-05, + "loss": 0.6304, + "step": 1170 + }, + { + "epoch": 1.88, + "grad_norm": 0.7866687774658203, + "learning_rate": 6.12484413547897e-05, + "loss": 0.7695, + "step": 1175 + }, + { + "epoch": 1.888, + "grad_norm": 0.8307198882102966, + "learning_rate": 6.047751197956838e-05, + "loss": 0.5723, + "step": 1180 + }, + { + "epoch": 1.896, + "grad_norm": 0.5541536211967468, + "learning_rate": 5.9709356428633746e-05, + "loss": 0.6847, + "step": 1185 + }, + { + "epoch": 1.904, + "grad_norm": 0.731555163860321, + "learning_rate": 5.8944028613787206e-05, + "loss": 0.6618, + "step": 1190 + }, + { + "epoch": 1.912, + "grad_norm": 1.020991325378418, + "learning_rate": 5.818158224836987e-05, + "loss": 0.6275, + "step": 1195 + }, + { + "epoch": 1.92, + "grad_norm": 0.7172287702560425, + "learning_rate": 5.7422070843492734e-05, + "loss": 0.5617, + "step": 1200 + } + ], + "logging_steps": 5, + "max_steps": 1875, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 7.518625937385062e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1200/training_args.bin b/checkpoint-1200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b72392a9094690809605a9b89988a05f68c770c0 --- /dev/null +++ b/checkpoint-1200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f6303cd7c1754cff6fd463f0e4dddf2a566ba7f29bbf2a72fbb560b32d5ce98 +size 5432 diff --git a/checkpoint-1300/README.md b/checkpoint-1300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..31d07dbe00996b0568fd0eb91d05ac9a95722459 --- /dev/null +++ b/checkpoint-1300/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.2-1B +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-1300/adapter_config.json b/checkpoint-1300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73202e5c7ba7132c890e52da7442b36a9efff4dd --- /dev/null +++ b/checkpoint-1300/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.2-1B", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "down_proj", + "k_proj", + "gate_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1300/adapter_model.safetensors b/checkpoint-1300/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..38b0b8b048ac8e4a9b3a6033d72aac96349e1c08 --- /dev/null +++ b/checkpoint-1300/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35b919733590839c4a71aea19cae3ae9f1ca80b08382c3a68f63fd4066635c5f +size 22573704 diff --git a/checkpoint-1300/optimizer.pt b/checkpoint-1300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2dc3d0117cc8c51cf4c7aaec019ce6c770808ddc --- /dev/null +++ b/checkpoint-1300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f085efe076d51c87f26edef8d9014f5b7a9b01d62e5e96d0a8f826026de0f57 +size 45276986 diff --git a/checkpoint-1300/rng_state.pth b/checkpoint-1300/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..323d589b0258d085c7a07b0c4f97b26796e865c5 --- /dev/null +++ b/checkpoint-1300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9196a1e708bf24d6abba41cce3f8558820acc3e50f9394c5955e29eb41ffea3d +size 14244 diff --git a/checkpoint-1300/scheduler.pt b/checkpoint-1300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a2fb85da7968e8f3d7f3fa43e1b905068c3327e --- /dev/null +++ b/checkpoint-1300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fc9f1578475de7fa30a036952f213791083f9d440f2f723c27ee6b3278e154f +size 1064 diff --git a/checkpoint-1300/special_tokens_map.json b/checkpoint-1300/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/checkpoint-1300/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/checkpoint-1300/tokenizer.json b/checkpoint-1300/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-1300/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-1300/tokenizer_config.json b/checkpoint-1300/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61c03bf2b5e0c03056cebf7a58c8c096dafc1ffc --- /dev/null +++ b/checkpoint-1300/tokenizer_config.json @@ -0,0 +1,2065 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end_of_text|>' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|end_of_text|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1300/trainer_state.json b/checkpoint-1300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d2bb18d62a2a0956d2a44748fa9d558806a28340 --- /dev/null +++ b/checkpoint-1300/trainer_state.json @@ -0,0 +1,1853 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.08, + "eval_steps": 500, + "global_step": 1300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008, + "grad_norm": 0.8485889434814453, + "learning_rate": 0.0001999964908278481, + "loss": 1.2049, + "step": 5 + }, + { + "epoch": 0.016, + "grad_norm": 0.47789862751960754, + "learning_rate": 0.00019998596355767805, + "loss": 0.9333, + "step": 10 + }, + { + "epoch": 0.024, + "grad_norm": 1.017558217048645, + "learning_rate": 0.00019996841892833, + "loss": 0.8671, + "step": 15 + }, + { + "epoch": 0.032, + "grad_norm": 0.6610977053642273, + "learning_rate": 0.00019994385817114646, + "loss": 0.7979, + "step": 20 + }, + { + "epoch": 0.04, + "grad_norm": 0.6075429320335388, + "learning_rate": 0.00019991228300988585, + "loss": 0.7662, + "step": 25 + }, + { + "epoch": 0.048, + "grad_norm": 0.6595763564109802, + "learning_rate": 0.00019987369566060176, + "loss": 0.7929, + "step": 30 + }, + { + "epoch": 0.056, + "grad_norm": 0.6968618035316467, + "learning_rate": 0.00019982809883148722, + "loss": 0.7683, + "step": 35 + }, + { + "epoch": 0.064, + "grad_norm": 0.4889592230319977, + "learning_rate": 0.00019977549572268468, + "loss": 0.8667, + "step": 40 + }, + { + "epoch": 0.072, + "grad_norm": 0.6651108264923096, + "learning_rate": 0.0001997158900260614, + "loss": 0.8446, + "step": 45 + }, + { + "epoch": 0.08, + "grad_norm": 0.5898510217666626, + "learning_rate": 0.00019964928592495045, + "loss": 0.9051, + "step": 50 + }, + { + "epoch": 0.088, + "grad_norm": 0.4398016035556793, + "learning_rate": 0.00019957568809385694, + "loss": 0.7235, + "step": 55 + }, + { + "epoch": 0.096, + "grad_norm": 0.6901968121528625, + "learning_rate": 0.00019949510169813003, + "loss": 0.8169, + "step": 60 + }, + { + "epoch": 0.104, + "grad_norm": 0.6267213225364685, + "learning_rate": 0.00019940753239360047, + "loss": 0.8266, + "step": 65 + }, + { + "epoch": 0.112, + "grad_norm": 0.48524895310401917, + "learning_rate": 0.00019931298632618356, + "loss": 0.758, + "step": 70 + }, + { + "epoch": 0.12, + "grad_norm": 0.5294132232666016, + "learning_rate": 0.0001992114701314478, + "loss": 0.7759, + "step": 75 + }, + { + "epoch": 0.128, + "grad_norm": 0.48957982659339905, + "learning_rate": 0.0001991029909341493, + "loss": 0.7797, + "step": 80 + }, + { + "epoch": 0.136, + "grad_norm": 0.645412802696228, + "learning_rate": 0.00019898755634773158, + "loss": 0.7437, + "step": 85 + }, + { + "epoch": 0.144, + "grad_norm": 0.43297675251960754, + "learning_rate": 0.0001988651744737914, + "loss": 0.8043, + "step": 90 + }, + { + "epoch": 0.152, + "grad_norm": 0.5513920783996582, + "learning_rate": 0.00019873585390151003, + "loss": 0.7701, + "step": 95 + }, + { + "epoch": 0.16, + "grad_norm": 0.8462435007095337, + "learning_rate": 0.0001985996037070505, + "loss": 0.709, + "step": 100 + }, + { + "epoch": 0.168, + "grad_norm": 0.6892585158348083, + "learning_rate": 0.00019845643345292054, + "loss": 0.7377, + "step": 105 + }, + { + "epoch": 0.176, + "grad_norm": 0.4617864191532135, + "learning_rate": 0.00019830635318730154, + "loss": 0.8352, + "step": 110 + }, + { + "epoch": 0.184, + "grad_norm": 0.6300354599952698, + "learning_rate": 0.0001981493734433433, + "loss": 0.7738, + "step": 115 + }, + { + "epoch": 0.192, + "grad_norm": 0.8086859583854675, + "learning_rate": 0.0001979855052384247, + "loss": 0.8067, + "step": 120 + }, + { + "epoch": 0.2, + "grad_norm": 0.6272985935211182, + "learning_rate": 0.00019781476007338058, + "loss": 0.7456, + "step": 125 + }, + { + "epoch": 0.208, + "grad_norm": 0.44750839471817017, + "learning_rate": 0.00019763714993169452, + "loss": 0.758, + "step": 130 + }, + { + "epoch": 0.216, + "grad_norm": 0.5053977370262146, + "learning_rate": 0.00019745268727865774, + "loss": 0.7895, + "step": 135 + }, + { + "epoch": 0.224, + "grad_norm": 0.41920769214630127, + "learning_rate": 0.00019726138506049438, + "loss": 0.7302, + "step": 140 + }, + { + "epoch": 0.232, + "grad_norm": 0.38280290365219116, + "learning_rate": 0.00019706325670345275, + "loss": 0.8152, + "step": 145 + }, + { + "epoch": 0.24, + "grad_norm": 0.554710865020752, + "learning_rate": 0.0001968583161128631, + "loss": 0.8461, + "step": 150 + }, + { + "epoch": 0.248, + "grad_norm": 0.5612509250640869, + "learning_rate": 0.00019664657767216176, + "loss": 0.7787, + "step": 155 + }, + { + "epoch": 0.256, + "grad_norm": 0.610614538192749, + "learning_rate": 0.00019642805624188147, + "loss": 0.7574, + "step": 160 + }, + { + "epoch": 0.264, + "grad_norm": 0.679517924785614, + "learning_rate": 0.0001962027671586086, + "loss": 0.8487, + "step": 165 + }, + { + "epoch": 0.272, + "grad_norm": 0.6685434579849243, + "learning_rate": 0.00019597072623390668, + "loss": 0.6611, + "step": 170 + }, + { + "epoch": 0.28, + "grad_norm": 0.480293869972229, + "learning_rate": 0.00019573194975320673, + "loss": 0.7802, + "step": 175 + }, + { + "epoch": 0.288, + "grad_norm": 0.7727369070053101, + "learning_rate": 0.00019548645447466431, + "loss": 0.6727, + "step": 180 + }, + { + "epoch": 0.296, + "grad_norm": 0.6371043920516968, + "learning_rate": 0.00019523425762798329, + "loss": 0.7502, + "step": 185 + }, + { + "epoch": 0.304, + "grad_norm": 0.6399966478347778, + "learning_rate": 0.00019497537691320668, + "loss": 0.8401, + "step": 190 + }, + { + "epoch": 0.312, + "grad_norm": 0.7263137698173523, + "learning_rate": 0.00019470983049947444, + "loss": 0.7494, + "step": 195 + }, + { + "epoch": 0.32, + "grad_norm": 0.402416467666626, + "learning_rate": 0.00019443763702374812, + "loss": 0.7842, + "step": 200 + }, + { + "epoch": 0.328, + "grad_norm": 0.6639626026153564, + "learning_rate": 0.00019415881558950302, + "loss": 0.8082, + "step": 205 + }, + { + "epoch": 0.336, + "grad_norm": 0.5801042914390564, + "learning_rate": 0.00019387338576538744, + "loss": 0.7883, + "step": 210 + }, + { + "epoch": 0.344, + "grad_norm": 0.5533607006072998, + "learning_rate": 0.00019358136758384912, + "loss": 0.7356, + "step": 215 + }, + { + "epoch": 0.352, + "grad_norm": 0.6019654273986816, + "learning_rate": 0.00019328278153972947, + "loss": 0.7891, + "step": 220 + }, + { + "epoch": 0.36, + "grad_norm": 0.5344104170799255, + "learning_rate": 0.00019297764858882514, + "loss": 0.7671, + "step": 225 + }, + { + "epoch": 0.368, + "grad_norm": 0.5494843125343323, + "learning_rate": 0.0001926659901464172, + "loss": 0.6608, + "step": 230 + }, + { + "epoch": 0.376, + "grad_norm": 0.465420126914978, + "learning_rate": 0.00019234782808576824, + "loss": 0.647, + "step": 235 + }, + { + "epoch": 0.384, + "grad_norm": 0.5202775001525879, + "learning_rate": 0.00019202318473658705, + "loss": 0.729, + "step": 240 + }, + { + "epoch": 0.392, + "grad_norm": 0.5757818222045898, + "learning_rate": 0.00019169208288346166, + "loss": 0.6713, + "step": 245 + }, + { + "epoch": 0.4, + "grad_norm": 0.46555572748184204, + "learning_rate": 0.0001913545457642601, + "loss": 0.7049, + "step": 250 + }, + { + "epoch": 0.408, + "grad_norm": 0.5101790428161621, + "learning_rate": 0.00019101059706849957, + "loss": 0.7419, + "step": 255 + }, + { + "epoch": 0.416, + "grad_norm": 0.6083744764328003, + "learning_rate": 0.00019066026093568378, + "loss": 0.7148, + "step": 260 + }, + { + "epoch": 0.424, + "grad_norm": 0.4719640612602234, + "learning_rate": 0.00019030356195360874, + "loss": 0.7493, + "step": 265 + }, + { + "epoch": 0.432, + "grad_norm": 0.7365225553512573, + "learning_rate": 0.0001899405251566371, + "loss": 0.7652, + "step": 270 + }, + { + "epoch": 0.44, + "grad_norm": 0.4452705383300781, + "learning_rate": 0.0001895711760239413, + "loss": 0.7438, + "step": 275 + }, + { + "epoch": 0.448, + "grad_norm": 0.6071786284446716, + "learning_rate": 0.0001891955404777151, + "loss": 0.7683, + "step": 280 + }, + { + "epoch": 0.456, + "grad_norm": 0.5774498581886292, + "learning_rate": 0.00018881364488135448, + "loss": 0.8115, + "step": 285 + }, + { + "epoch": 0.464, + "grad_norm": 0.6134682893753052, + "learning_rate": 0.00018842551603760724, + "loss": 0.8335, + "step": 290 + }, + { + "epoch": 0.472, + "grad_norm": 0.4869893193244934, + "learning_rate": 0.00018803118118669202, + "loss": 0.6933, + "step": 295 + }, + { + "epoch": 0.48, + "grad_norm": 0.6457111239433289, + "learning_rate": 0.00018763066800438636, + "loss": 0.7515, + "step": 300 + }, + { + "epoch": 0.488, + "grad_norm": 0.59674471616745, + "learning_rate": 0.0001872240046000844, + "loss": 0.6931, + "step": 305 + }, + { + "epoch": 0.496, + "grad_norm": 0.44608160853385925, + "learning_rate": 0.00018681121951482393, + "loss": 0.782, + "step": 310 + }, + { + "epoch": 0.504, + "grad_norm": 0.5934664607048035, + "learning_rate": 0.00018639234171928353, + "loss": 0.7361, + "step": 315 + }, + { + "epoch": 0.512, + "grad_norm": 0.49716323614120483, + "learning_rate": 0.0001859674006117491, + "loss": 0.7443, + "step": 320 + }, + { + "epoch": 0.52, + "grad_norm": 0.47995495796203613, + "learning_rate": 0.00018553642601605068, + "loss": 0.7221, + "step": 325 + }, + { + "epoch": 0.528, + "grad_norm": 0.5177399516105652, + "learning_rate": 0.00018509944817946922, + "loss": 0.7622, + "step": 330 + }, + { + "epoch": 0.536, + "grad_norm": 0.6638798713684082, + "learning_rate": 0.0001846564977706138, + "loss": 0.8556, + "step": 335 + }, + { + "epoch": 0.544, + "grad_norm": 0.5056771636009216, + "learning_rate": 0.00018420760587726923, + "loss": 0.7814, + "step": 340 + }, + { + "epoch": 0.552, + "grad_norm": 0.44543707370758057, + "learning_rate": 0.0001837528040042142, + "loss": 0.722, + "step": 345 + }, + { + "epoch": 0.56, + "grad_norm": 0.6765120625495911, + "learning_rate": 0.00018329212407100994, + "loss": 0.7903, + "step": 350 + }, + { + "epoch": 0.568, + "grad_norm": 0.49232372641563416, + "learning_rate": 0.00018282559840976042, + "loss": 0.6996, + "step": 355 + }, + { + "epoch": 0.576, + "grad_norm": 0.47392791509628296, + "learning_rate": 0.00018235325976284275, + "loss": 0.773, + "step": 360 + }, + { + "epoch": 0.584, + "grad_norm": 0.5056615471839905, + "learning_rate": 0.00018187514128060946, + "loss": 0.728, + "step": 365 + }, + { + "epoch": 0.592, + "grad_norm": 0.5857616662979126, + "learning_rate": 0.00018139127651906184, + "loss": 0.7659, + "step": 370 + }, + { + "epoch": 0.6, + "grad_norm": 0.5966864228248596, + "learning_rate": 0.00018090169943749476, + "loss": 0.7039, + "step": 375 + }, + { + "epoch": 0.608, + "grad_norm": 0.4524347484111786, + "learning_rate": 0.00018040644439611348, + "loss": 0.7125, + "step": 380 + }, + { + "epoch": 0.616, + "grad_norm": 0.5570976138114929, + "learning_rate": 0.00017990554615362198, + "loss": 0.698, + "step": 385 + }, + { + "epoch": 0.624, + "grad_norm": 0.6045777201652527, + "learning_rate": 0.00017939903986478355, + "loss": 0.8255, + "step": 390 + }, + { + "epoch": 0.632, + "grad_norm": 0.6149687767028809, + "learning_rate": 0.00017888696107795342, + "loss": 0.6616, + "step": 395 + }, + { + "epoch": 0.64, + "grad_norm": 0.4873579144477844, + "learning_rate": 0.000178369345732584, + "loss": 0.7452, + "step": 400 + }, + { + "epoch": 0.648, + "grad_norm": 0.5569061636924744, + "learning_rate": 0.00017784623015670238, + "loss": 0.7652, + "step": 405 + }, + { + "epoch": 0.656, + "grad_norm": 0.5825181603431702, + "learning_rate": 0.00017731765106436073, + "loss": 0.7793, + "step": 410 + }, + { + "epoch": 0.664, + "grad_norm": 0.4047383666038513, + "learning_rate": 0.00017678364555305978, + "loss": 0.6875, + "step": 415 + }, + { + "epoch": 0.672, + "grad_norm": 0.5080836415290833, + "learning_rate": 0.0001762442511011448, + "loss": 0.7465, + "step": 420 + }, + { + "epoch": 0.68, + "grad_norm": 0.5825940370559692, + "learning_rate": 0.00017569950556517566, + "loss": 0.7205, + "step": 425 + }, + { + "epoch": 0.688, + "grad_norm": 0.476992666721344, + "learning_rate": 0.00017514944717726962, + "loss": 0.6589, + "step": 430 + }, + { + "epoch": 0.696, + "grad_norm": 0.7424727082252502, + "learning_rate": 0.00017459411454241822, + "loss": 0.7035, + "step": 435 + }, + { + "epoch": 0.704, + "grad_norm": 0.6544787287712097, + "learning_rate": 0.00017403354663577783, + "loss": 0.787, + "step": 440 + }, + { + "epoch": 0.712, + "grad_norm": 0.49425187706947327, + "learning_rate": 0.00017346778279993415, + "loss": 0.7515, + "step": 445 + }, + { + "epoch": 0.72, + "grad_norm": 0.5473236441612244, + "learning_rate": 0.00017289686274214118, + "loss": 0.7199, + "step": 450 + }, + { + "epoch": 0.728, + "grad_norm": 0.6773544549942017, + "learning_rate": 0.00017232082653153422, + "loss": 0.8037, + "step": 455 + }, + { + "epoch": 0.736, + "grad_norm": 0.6355096101760864, + "learning_rate": 0.00017173971459631787, + "loss": 0.7502, + "step": 460 + }, + { + "epoch": 0.744, + "grad_norm": 0.47867000102996826, + "learning_rate": 0.00017115356772092857, + "loss": 0.7446, + "step": 465 + }, + { + "epoch": 0.752, + "grad_norm": 0.5135357975959778, + "learning_rate": 0.0001705624270431721, + "loss": 0.6507, + "step": 470 + }, + { + "epoch": 0.76, + "grad_norm": 0.48866042494773865, + "learning_rate": 0.00016996633405133655, + "loss": 0.7164, + "step": 475 + }, + { + "epoch": 0.768, + "grad_norm": 0.5892354249954224, + "learning_rate": 0.0001693653305812805, + "loss": 0.7621, + "step": 480 + }, + { + "epoch": 0.776, + "grad_norm": 0.6633970141410828, + "learning_rate": 0.00016875945881349676, + "loss": 0.7623, + "step": 485 + }, + { + "epoch": 0.784, + "grad_norm": 0.6444060802459717, + "learning_rate": 0.000168148761270152, + "loss": 0.6606, + "step": 490 + }, + { + "epoch": 0.792, + "grad_norm": 0.7012648582458496, + "learning_rate": 0.00016753328081210245, + "loss": 0.6941, + "step": 495 + }, + { + "epoch": 0.8, + "grad_norm": 0.7064160704612732, + "learning_rate": 0.00016691306063588583, + "loss": 0.6841, + "step": 500 + }, + { + "epoch": 0.808, + "grad_norm": 0.7241398096084595, + "learning_rate": 0.00016628814427068953, + "loss": 0.6996, + "step": 505 + }, + { + "epoch": 0.816, + "grad_norm": 0.7807374596595764, + "learning_rate": 0.00016565857557529566, + "loss": 0.7542, + "step": 510 + }, + { + "epoch": 0.824, + "grad_norm": 0.763768196105957, + "learning_rate": 0.00016502439873500289, + "loss": 0.7175, + "step": 515 + }, + { + "epoch": 0.832, + "grad_norm": 0.6105090379714966, + "learning_rate": 0.0001643856582585254, + "loss": 0.7565, + "step": 520 + }, + { + "epoch": 0.84, + "grad_norm": 0.5686540603637695, + "learning_rate": 0.000163742398974869, + "loss": 0.7339, + "step": 525 + }, + { + "epoch": 0.848, + "grad_norm": 0.5341500043869019, + "learning_rate": 0.00016309466603018496, + "loss": 0.569, + "step": 530 + }, + { + "epoch": 0.856, + "grad_norm": 0.7274748682975769, + "learning_rate": 0.00016244250488460158, + "loss": 0.7556, + "step": 535 + }, + { + "epoch": 0.864, + "grad_norm": 0.7321165204048157, + "learning_rate": 0.00016178596130903344, + "loss": 0.7084, + "step": 540 + }, + { + "epoch": 0.872, + "grad_norm": 0.5086159110069275, + "learning_rate": 0.00016112508138196917, + "loss": 0.6935, + "step": 545 + }, + { + "epoch": 0.88, + "grad_norm": 0.4714389443397522, + "learning_rate": 0.0001604599114862375, + "loss": 0.7076, + "step": 550 + }, + { + "epoch": 0.888, + "grad_norm": 0.5031452178955078, + "learning_rate": 0.0001597904983057519, + "loss": 0.7151, + "step": 555 + }, + { + "epoch": 0.896, + "grad_norm": 0.7745943665504456, + "learning_rate": 0.0001591168888222342, + "loss": 0.7001, + "step": 560 + }, + { + "epoch": 0.904, + "grad_norm": 0.6076303124427795, + "learning_rate": 0.00015843913031191723, + "loss": 0.7285, + "step": 565 + }, + { + "epoch": 0.912, + "grad_norm": 0.7456529140472412, + "learning_rate": 0.00015775727034222675, + "loss": 0.8041, + "step": 570 + }, + { + "epoch": 0.92, + "grad_norm": 0.5760998725891113, + "learning_rate": 0.0001570713567684432, + "loss": 0.7353, + "step": 575 + }, + { + "epoch": 0.928, + "grad_norm": 0.7057327032089233, + "learning_rate": 0.00015638143773034267, + "loss": 0.7792, + "step": 580 + }, + { + "epoch": 0.936, + "grad_norm": 0.7615967392921448, + "learning_rate": 0.00015568756164881882, + "loss": 1.0121, + "step": 585 + }, + { + "epoch": 0.944, + "grad_norm": 0.6304950714111328, + "learning_rate": 0.000154989777222484, + "loss": 0.7727, + "step": 590 + }, + { + "epoch": 0.952, + "grad_norm": 0.6852543950080872, + "learning_rate": 0.00015428813342425177, + "loss": 0.741, + "step": 595 + }, + { + "epoch": 0.96, + "grad_norm": 0.6379660964012146, + "learning_rate": 0.00015358267949789966, + "loss": 0.6919, + "step": 600 + }, + { + "epoch": 0.968, + "grad_norm": 0.5846463441848755, + "learning_rate": 0.00015287346495461315, + "loss": 0.7163, + "step": 605 + }, + { + "epoch": 0.976, + "grad_norm": 0.5999557971954346, + "learning_rate": 0.0001521605395695108, + "loss": 0.8152, + "step": 610 + }, + { + "epoch": 0.984, + "grad_norm": 0.5806307196617126, + "learning_rate": 0.00015144395337815064, + "loss": 0.6709, + "step": 615 + }, + { + "epoch": 0.992, + "grad_norm": 0.6559942960739136, + "learning_rate": 0.00015072375667301893, + "loss": 0.6527, + "step": 620 + }, + { + "epoch": 1.0, + "grad_norm": 0.6287715435028076, + "learning_rate": 0.00015000000000000001, + "loss": 0.8194, + "step": 625 + }, + { + "epoch": 1.008, + "grad_norm": 0.616222620010376, + "learning_rate": 0.00014927273415482915, + "loss": 0.6627, + "step": 630 + }, + { + "epoch": 1.016, + "grad_norm": 0.4750412106513977, + "learning_rate": 0.0001485420101795274, + "loss": 0.6366, + "step": 635 + }, + { + "epoch": 1.024, + "grad_norm": 0.5122964978218079, + "learning_rate": 0.00014780787935881923, + "loss": 0.6717, + "step": 640 + }, + { + "epoch": 1.032, + "grad_norm": 0.7382633090019226, + "learning_rate": 0.0001470703932165333, + "loss": 0.6483, + "step": 645 + }, + { + "epoch": 1.04, + "grad_norm": 0.6540554761886597, + "learning_rate": 0.00014632960351198618, + "loss": 0.6151, + "step": 650 + }, + { + "epoch": 1.048, + "grad_norm": 0.4776591956615448, + "learning_rate": 0.00014558556223635003, + "loss": 0.6707, + "step": 655 + }, + { + "epoch": 1.056, + "grad_norm": 0.8012662529945374, + "learning_rate": 0.00014483832160900326, + "loss": 0.6125, + "step": 660 + }, + { + "epoch": 1.064, + "grad_norm": 0.6735953092575073, + "learning_rate": 0.00014408793407386588, + "loss": 0.6206, + "step": 665 + }, + { + "epoch": 1.072, + "grad_norm": 0.5640230774879456, + "learning_rate": 0.00014333445229571873, + "loss": 0.6161, + "step": 670 + }, + { + "epoch": 1.08, + "grad_norm": 0.5928654074668884, + "learning_rate": 0.00014257792915650728, + "loss": 0.6583, + "step": 675 + }, + { + "epoch": 1.088, + "grad_norm": 0.7347397208213806, + "learning_rate": 0.00014181841775163013, + "loss": 0.6222, + "step": 680 + }, + { + "epoch": 1.096, + "grad_norm": 0.593773365020752, + "learning_rate": 0.0001410559713862128, + "loss": 0.716, + "step": 685 + }, + { + "epoch": 1.104, + "grad_norm": 0.6244611144065857, + "learning_rate": 0.00014029064357136628, + "loss": 0.6198, + "step": 690 + }, + { + "epoch": 1.112, + "grad_norm": 0.5083370804786682, + "learning_rate": 0.00013952248802043165, + "loss": 0.6389, + "step": 695 + }, + { + "epoch": 1.12, + "grad_norm": 0.5241413116455078, + "learning_rate": 0.0001387515586452103, + "loss": 0.6842, + "step": 700 + }, + { + "epoch": 1.1280000000000001, + "grad_norm": 0.524029016494751, + "learning_rate": 0.00013797790955218014, + "loss": 0.6071, + "step": 705 + }, + { + "epoch": 1.1360000000000001, + "grad_norm": 0.5097878575325012, + "learning_rate": 0.00013720159503869815, + "loss": 0.5915, + "step": 710 + }, + { + "epoch": 1.144, + "grad_norm": 0.5782963037490845, + "learning_rate": 0.00013642266958918984, + "loss": 0.6794, + "step": 715 + }, + { + "epoch": 1.152, + "grad_norm": 0.6088266372680664, + "learning_rate": 0.00013564118787132506, + "loss": 0.6773, + "step": 720 + }, + { + "epoch": 1.16, + "grad_norm": 0.7768995761871338, + "learning_rate": 0.00013485720473218154, + "loss": 0.668, + "step": 725 + }, + { + "epoch": 1.168, + "grad_norm": 0.6645551919937134, + "learning_rate": 0.0001340707751943952, + "loss": 0.6997, + "step": 730 + }, + { + "epoch": 1.176, + "grad_norm": 0.9228842258453369, + "learning_rate": 0.00013328195445229868, + "loss": 0.831, + "step": 735 + }, + { + "epoch": 1.184, + "grad_norm": 0.7556049823760986, + "learning_rate": 0.00013249079786804765, + "loss": 0.6378, + "step": 740 + }, + { + "epoch": 1.192, + "grad_norm": 0.832775354385376, + "learning_rate": 0.0001316973609677352, + "loss": 0.6547, + "step": 745 + }, + { + "epoch": 1.2, + "grad_norm": 0.7329304814338684, + "learning_rate": 0.00013090169943749476, + "loss": 0.5808, + "step": 750 + }, + { + "epoch": 1.208, + "grad_norm": 0.7193475961685181, + "learning_rate": 0.00013010386911959206, + "loss": 0.5582, + "step": 755 + }, + { + "epoch": 1.216, + "grad_norm": 0.6274734735488892, + "learning_rate": 0.00012930392600850573, + "loss": 0.5801, + "step": 760 + }, + { + "epoch": 1.224, + "grad_norm": 0.6485865712165833, + "learning_rate": 0.0001285019262469976, + "loss": 0.65, + "step": 765 + }, + { + "epoch": 1.232, + "grad_norm": 0.7164427042007446, + "learning_rate": 0.00012769792612217224, + "loss": 0.6627, + "step": 770 + }, + { + "epoch": 1.24, + "grad_norm": 0.600775957107544, + "learning_rate": 0.00012689198206152657, + "loss": 0.5603, + "step": 775 + }, + { + "epoch": 1.248, + "grad_norm": 0.8377975225448608, + "learning_rate": 0.00012608415062898972, + "loss": 0.6525, + "step": 780 + }, + { + "epoch": 1.256, + "grad_norm": 0.8069924116134644, + "learning_rate": 0.00012527448852095295, + "loss": 0.6731, + "step": 785 + }, + { + "epoch": 1.264, + "grad_norm": 0.6501213908195496, + "learning_rate": 0.00012446305256229073, + "loss": 0.6255, + "step": 790 + }, + { + "epoch": 1.272, + "grad_norm": 0.62812340259552, + "learning_rate": 0.00012364989970237248, + "loss": 0.6585, + "step": 795 + }, + { + "epoch": 1.28, + "grad_norm": 0.5702307820320129, + "learning_rate": 0.00012283508701106557, + "loss": 0.5996, + "step": 800 + }, + { + "epoch": 1.288, + "grad_norm": 0.6311281323432922, + "learning_rate": 0.00012201867167473015, + "loss": 0.6355, + "step": 805 + }, + { + "epoch": 1.296, + "grad_norm": 0.5885419249534607, + "learning_rate": 0.00012120071099220549, + "loss": 0.6615, + "step": 810 + }, + { + "epoch": 1.304, + "grad_norm": 0.5239307284355164, + "learning_rate": 0.0001203812623707885, + "loss": 0.6096, + "step": 815 + }, + { + "epoch": 1.312, + "grad_norm": 0.6101869940757751, + "learning_rate": 0.00011956038332220483, + "loss": 0.5984, + "step": 820 + }, + { + "epoch": 1.32, + "grad_norm": 0.4395413100719452, + "learning_rate": 0.00011873813145857249, + "loss": 0.5569, + "step": 825 + }, + { + "epoch": 1.328, + "grad_norm": 0.8984820246696472, + "learning_rate": 0.00011791456448835825, + "loss": 0.7088, + "step": 830 + }, + { + "epoch": 1.336, + "grad_norm": 0.7709664106369019, + "learning_rate": 0.00011708974021232769, + "loss": 0.6731, + "step": 835 + }, + { + "epoch": 1.3439999999999999, + "grad_norm": 0.6782217025756836, + "learning_rate": 0.00011626371651948838, + "loss": 0.6188, + "step": 840 + }, + { + "epoch": 1.3519999999999999, + "grad_norm": 0.6427358984947205, + "learning_rate": 0.00011543655138302714, + "loss": 0.7004, + "step": 845 + }, + { + "epoch": 1.3599999999999999, + "grad_norm": 0.5902594923973083, + "learning_rate": 0.00011460830285624118, + "loss": 0.5884, + "step": 850 + }, + { + "epoch": 1.3679999999999999, + "grad_norm": 0.5935835838317871, + "learning_rate": 0.0001137790290684638, + "loss": 0.5739, + "step": 855 + }, + { + "epoch": 1.376, + "grad_norm": 0.6752728223800659, + "learning_rate": 0.00011294878822098469, + "loss": 0.6435, + "step": 860 + }, + { + "epoch": 1.384, + "grad_norm": 0.7927135825157166, + "learning_rate": 0.00011211763858296507, + "loss": 0.6897, + "step": 865 + }, + { + "epoch": 1.392, + "grad_norm": 0.714499294757843, + "learning_rate": 0.00011128563848734816, + "loss": 0.6641, + "step": 870 + }, + { + "epoch": 1.4, + "grad_norm": 0.7086356282234192, + "learning_rate": 0.00011045284632676536, + "loss": 0.6273, + "step": 875 + }, + { + "epoch": 1.408, + "grad_norm": 0.6125518679618835, + "learning_rate": 0.00010961932054943778, + "loss": 0.6437, + "step": 880 + }, + { + "epoch": 1.416, + "grad_norm": 0.5635287165641785, + "learning_rate": 0.00010878511965507434, + "loss": 0.6345, + "step": 885 + }, + { + "epoch": 1.424, + "grad_norm": 0.47936007380485535, + "learning_rate": 0.00010795030219076599, + "loss": 0.5913, + "step": 890 + }, + { + "epoch": 1.432, + "grad_norm": 0.7142558097839355, + "learning_rate": 0.00010711492674687671, + "loss": 0.6482, + "step": 895 + }, + { + "epoch": 1.44, + "grad_norm": 0.5252729058265686, + "learning_rate": 0.00010627905195293135, + "loss": 0.6165, + "step": 900 + }, + { + "epoch": 1.448, + "grad_norm": 0.896318793296814, + "learning_rate": 0.00010544273647350092, + "loss": 0.634, + "step": 905 + }, + { + "epoch": 1.456, + "grad_norm": 0.6029036045074463, + "learning_rate": 0.00010460603900408523, + "loss": 0.6509, + "step": 910 + }, + { + "epoch": 1.464, + "grad_norm": 0.6835671663284302, + "learning_rate": 0.00010376901826699348, + "loss": 0.6212, + "step": 915 + }, + { + "epoch": 1.472, + "grad_norm": 0.7098750472068787, + "learning_rate": 0.00010293173300722285, + "loss": 0.7305, + "step": 920 + }, + { + "epoch": 1.48, + "grad_norm": 0.675316333770752, + "learning_rate": 0.0001020942419883357, + "loss": 0.6685, + "step": 925 + }, + { + "epoch": 1.488, + "grad_norm": 0.6675406098365784, + "learning_rate": 0.00010125660398833528, + "loss": 0.6214, + "step": 930 + }, + { + "epoch": 1.496, + "grad_norm": 0.6629154682159424, + "learning_rate": 0.0001004188777955404, + "loss": 0.6035, + "step": 935 + }, + { + "epoch": 1.504, + "grad_norm": 0.7732692360877991, + "learning_rate": 9.958112220445963e-05, + "loss": 0.5868, + "step": 940 + }, + { + "epoch": 1.512, + "grad_norm": 0.6238484978675842, + "learning_rate": 9.874339601166473e-05, + "loss": 0.6003, + "step": 945 + }, + { + "epoch": 1.52, + "grad_norm": 0.6622412800788879, + "learning_rate": 9.790575801166432e-05, + "loss": 0.5854, + "step": 950 + }, + { + "epoch": 1.528, + "grad_norm": 0.7598085999488831, + "learning_rate": 9.706826699277718e-05, + "loss": 0.5882, + "step": 955 + }, + { + "epoch": 1.536, + "grad_norm": 0.8870390057563782, + "learning_rate": 9.623098173300654e-05, + "loss": 0.7187, + "step": 960 + }, + { + "epoch": 1.544, + "grad_norm": 0.702232837677002, + "learning_rate": 9.539396099591476e-05, + "loss": 0.6156, + "step": 965 + }, + { + "epoch": 1.552, + "grad_norm": 0.694139301776886, + "learning_rate": 9.455726352649911e-05, + "loss": 0.6488, + "step": 970 + }, + { + "epoch": 1.56, + "grad_norm": 0.5684956312179565, + "learning_rate": 9.372094804706867e-05, + "loss": 0.6601, + "step": 975 + }, + { + "epoch": 1.568, + "grad_norm": 0.6327118277549744, + "learning_rate": 9.288507325312335e-05, + "loss": 0.5968, + "step": 980 + }, + { + "epoch": 1.576, + "grad_norm": 0.5732144117355347, + "learning_rate": 9.204969780923403e-05, + "loss": 0.7034, + "step": 985 + }, + { + "epoch": 1.584, + "grad_norm": 0.7025273442268372, + "learning_rate": 9.121488034492569e-05, + "loss": 0.5973, + "step": 990 + }, + { + "epoch": 1.592, + "grad_norm": 0.9270740151405334, + "learning_rate": 9.038067945056227e-05, + "loss": 0.7877, + "step": 995 + }, + { + "epoch": 1.6, + "grad_norm": 0.6676818132400513, + "learning_rate": 8.954715367323468e-05, + "loss": 0.644, + "step": 1000 + }, + { + "epoch": 1.608, + "grad_norm": 0.6687547564506531, + "learning_rate": 8.871436151265184e-05, + "loss": 0.6678, + "step": 1005 + }, + { + "epoch": 1.616, + "grad_norm": 0.7400322556495667, + "learning_rate": 8.788236141703498e-05, + "loss": 0.6088, + "step": 1010 + }, + { + "epoch": 1.624, + "grad_norm": 0.5504963994026184, + "learning_rate": 8.705121177901532e-05, + "loss": 0.6219, + "step": 1015 + }, + { + "epoch": 1.6320000000000001, + "grad_norm": 0.8088738322257996, + "learning_rate": 8.62209709315362e-05, + "loss": 0.6698, + "step": 1020 + }, + { + "epoch": 1.6400000000000001, + "grad_norm": 0.7380816340446472, + "learning_rate": 8.539169714375885e-05, + "loss": 0.6207, + "step": 1025 + }, + { + "epoch": 1.6480000000000001, + "grad_norm": 0.6346850395202637, + "learning_rate": 8.456344861697289e-05, + "loss": 0.626, + "step": 1030 + }, + { + "epoch": 1.6560000000000001, + "grad_norm": 0.49918702244758606, + "learning_rate": 8.373628348051165e-05, + "loss": 0.6972, + "step": 1035 + }, + { + "epoch": 1.6640000000000001, + "grad_norm": 0.7200607657432556, + "learning_rate": 8.291025978767235e-05, + "loss": 0.6282, + "step": 1040 + }, + { + "epoch": 1.6720000000000002, + "grad_norm": 0.8350688815116882, + "learning_rate": 8.208543551164178e-05, + "loss": 0.6219, + "step": 1045 + }, + { + "epoch": 1.6800000000000002, + "grad_norm": 0.5957468748092651, + "learning_rate": 8.126186854142752e-05, + "loss": 0.622, + "step": 1050 + }, + { + "epoch": 1.688, + "grad_norm": 0.7494757175445557, + "learning_rate": 8.04396166777952e-05, + "loss": 0.5801, + "step": 1055 + }, + { + "epoch": 1.696, + "grad_norm": 0.7109113931655884, + "learning_rate": 7.961873762921153e-05, + "loss": 0.598, + "step": 1060 + }, + { + "epoch": 1.704, + "grad_norm": 0.7941219210624695, + "learning_rate": 7.879928900779456e-05, + "loss": 0.699, + "step": 1065 + }, + { + "epoch": 1.712, + "grad_norm": 0.7282152771949768, + "learning_rate": 7.798132832526986e-05, + "loss": 0.5882, + "step": 1070 + }, + { + "epoch": 1.72, + "grad_norm": 0.5923735499382019, + "learning_rate": 7.716491298893442e-05, + "loss": 0.5321, + "step": 1075 + }, + { + "epoch": 1.728, + "grad_norm": 0.5571523308753967, + "learning_rate": 7.635010029762756e-05, + "loss": 0.6647, + "step": 1080 + }, + { + "epoch": 1.736, + "grad_norm": 0.6043043732643127, + "learning_rate": 7.553694743770928e-05, + "loss": 0.628, + "step": 1085 + }, + { + "epoch": 1.744, + "grad_norm": 0.8733720183372498, + "learning_rate": 7.472551147904708e-05, + "loss": 0.6262, + "step": 1090 + }, + { + "epoch": 1.752, + "grad_norm": 0.5561848878860474, + "learning_rate": 7.391584937101033e-05, + "loss": 0.6131, + "step": 1095 + }, + { + "epoch": 1.76, + "grad_norm": 0.6611356735229492, + "learning_rate": 7.310801793847344e-05, + "loss": 0.6494, + "step": 1100 + }, + { + "epoch": 1.768, + "grad_norm": 0.5700333714485168, + "learning_rate": 7.230207387782776e-05, + "loss": 0.5514, + "step": 1105 + }, + { + "epoch": 1.776, + "grad_norm": 0.705662727355957, + "learning_rate": 7.149807375300239e-05, + "loss": 0.5823, + "step": 1110 + }, + { + "epoch": 1.784, + "grad_norm": 0.9632449150085449, + "learning_rate": 7.069607399149428e-05, + "loss": 0.7207, + "step": 1115 + }, + { + "epoch": 1.792, + "grad_norm": 0.6412242650985718, + "learning_rate": 6.989613088040796e-05, + "loss": 0.7006, + "step": 1120 + }, + { + "epoch": 1.8, + "grad_norm": 0.538044273853302, + "learning_rate": 6.909830056250527e-05, + "loss": 0.6222, + "step": 1125 + }, + { + "epoch": 1.808, + "grad_norm": 0.6124762296676636, + "learning_rate": 6.830263903226483e-05, + "loss": 0.6569, + "step": 1130 + }, + { + "epoch": 1.8159999999999998, + "grad_norm": 0.7250857949256897, + "learning_rate": 6.750920213195238e-05, + "loss": 0.543, + "step": 1135 + }, + { + "epoch": 1.8239999999999998, + "grad_norm": 0.6840488910675049, + "learning_rate": 6.671804554770135e-05, + "loss": 0.6334, + "step": 1140 + }, + { + "epoch": 1.8319999999999999, + "grad_norm": 0.8932304382324219, + "learning_rate": 6.592922480560483e-05, + "loss": 0.6701, + "step": 1145 + }, + { + "epoch": 1.8399999999999999, + "grad_norm": 0.9220761656761169, + "learning_rate": 6.51427952678185e-05, + "loss": 0.6216, + "step": 1150 + }, + { + "epoch": 1.8479999999999999, + "grad_norm": 0.688731849193573, + "learning_rate": 6.435881212867493e-05, + "loss": 0.5877, + "step": 1155 + }, + { + "epoch": 1.8559999999999999, + "grad_norm": 0.6370134949684143, + "learning_rate": 6.357733041081018e-05, + "loss": 0.6256, + "step": 1160 + }, + { + "epoch": 1.8639999999999999, + "grad_norm": 0.7421153783798218, + "learning_rate": 6.27984049613019e-05, + "loss": 0.7062, + "step": 1165 + }, + { + "epoch": 1.8719999999999999, + "grad_norm": 0.8129108548164368, + "learning_rate": 6.20220904478199e-05, + "loss": 0.6304, + "step": 1170 + }, + { + "epoch": 1.88, + "grad_norm": 0.7866687774658203, + "learning_rate": 6.12484413547897e-05, + "loss": 0.7695, + "step": 1175 + }, + { + "epoch": 1.888, + "grad_norm": 0.8307198882102966, + "learning_rate": 6.047751197956838e-05, + "loss": 0.5723, + "step": 1180 + }, + { + "epoch": 1.896, + "grad_norm": 0.5541536211967468, + "learning_rate": 5.9709356428633746e-05, + "loss": 0.6847, + "step": 1185 + }, + { + "epoch": 1.904, + "grad_norm": 0.731555163860321, + "learning_rate": 5.8944028613787206e-05, + "loss": 0.6618, + "step": 1190 + }, + { + "epoch": 1.912, + "grad_norm": 1.020991325378418, + "learning_rate": 5.818158224836987e-05, + "loss": 0.6275, + "step": 1195 + }, + { + "epoch": 1.92, + "grad_norm": 0.7172287702560425, + "learning_rate": 5.7422070843492734e-05, + "loss": 0.5617, + "step": 1200 + }, + { + "epoch": 1.928, + "grad_norm": 0.7232096791267395, + "learning_rate": 5.666554770428129e-05, + "loss": 0.6278, + "step": 1205 + }, + { + "epoch": 1.936, + "grad_norm": 0.7234194278717041, + "learning_rate": 5.591206592613416e-05, + "loss": 0.6713, + "step": 1210 + }, + { + "epoch": 1.944, + "grad_norm": 0.6906010508537292, + "learning_rate": 5.5161678390996796e-05, + "loss": 0.6113, + "step": 1215 + }, + { + "epoch": 1.952, + "grad_norm": 0.9057906270027161, + "learning_rate": 5.441443776365003e-05, + "loss": 0.5587, + "step": 1220 + }, + { + "epoch": 1.96, + "grad_norm": 0.7144932150840759, + "learning_rate": 5.3670396488013854e-05, + "loss": 0.5601, + "step": 1225 + }, + { + "epoch": 1.968, + "grad_norm": 1.0071252584457397, + "learning_rate": 5.292960678346675e-05, + "loss": 0.5941, + "step": 1230 + }, + { + "epoch": 1.976, + "grad_norm": 0.7061260342597961, + "learning_rate": 5.2192120641180786e-05, + "loss": 0.6285, + "step": 1235 + }, + { + "epoch": 1.984, + "grad_norm": 0.6937069892883301, + "learning_rate": 5.145798982047261e-05, + "loss": 0.6516, + "step": 1240 + }, + { + "epoch": 1.992, + "grad_norm": 0.5176392793655396, + "learning_rate": 5.072726584517086e-05, + "loss": 0.5904, + "step": 1245 + }, + { + "epoch": 2.0, + "grad_norm": 0.8501409292221069, + "learning_rate": 5.000000000000002e-05, + "loss": 0.619, + "step": 1250 + }, + { + "epoch": 2.008, + "grad_norm": 0.5451085567474365, + "learning_rate": 4.927624332698109e-05, + "loss": 0.6058, + "step": 1255 + }, + { + "epoch": 2.016, + "grad_norm": 0.8410437107086182, + "learning_rate": 4.8556046621849346e-05, + "loss": 0.6248, + "step": 1260 + }, + { + "epoch": 2.024, + "grad_norm": 0.6089378595352173, + "learning_rate": 4.783946043048923e-05, + "loss": 0.5247, + "step": 1265 + }, + { + "epoch": 2.032, + "grad_norm": 0.47437986731529236, + "learning_rate": 4.712653504538683e-05, + "loss": 0.5439, + "step": 1270 + }, + { + "epoch": 2.04, + "grad_norm": 0.6925654411315918, + "learning_rate": 4.6417320502100316e-05, + "loss": 0.4491, + "step": 1275 + }, + { + "epoch": 2.048, + "grad_norm": 0.8538162708282471, + "learning_rate": 4.5711866575748276e-05, + "loss": 0.52, + "step": 1280 + }, + { + "epoch": 2.056, + "grad_norm": 0.8048768639564514, + "learning_rate": 4.501022277751602e-05, + "loss": 0.5259, + "step": 1285 + }, + { + "epoch": 2.064, + "grad_norm": 0.7191641330718994, + "learning_rate": 4.431243835118124e-05, + "loss": 0.5025, + "step": 1290 + }, + { + "epoch": 2.072, + "grad_norm": 1.0918892621994019, + "learning_rate": 4.361856226965733e-05, + "loss": 0.4772, + "step": 1295 + }, + { + "epoch": 2.08, + "grad_norm": 0.8152824640274048, + "learning_rate": 4.2928643231556844e-05, + "loss": 0.5945, + "step": 1300 + } + ], + "logging_steps": 5, + "max_steps": 1875, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8.139909901477478e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1300/training_args.bin b/checkpoint-1300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b72392a9094690809605a9b89988a05f68c770c0 --- /dev/null +++ b/checkpoint-1300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f6303cd7c1754cff6fd463f0e4dddf2a566ba7f29bbf2a72fbb560b32d5ce98 +size 5432 diff --git a/checkpoint-1400/README.md b/checkpoint-1400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..31d07dbe00996b0568fd0eb91d05ac9a95722459 --- /dev/null +++ b/checkpoint-1400/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.2-1B +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-1400/adapter_config.json b/checkpoint-1400/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73202e5c7ba7132c890e52da7442b36a9efff4dd --- /dev/null +++ b/checkpoint-1400/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.2-1B", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "down_proj", + "k_proj", + "gate_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1400/adapter_model.safetensors b/checkpoint-1400/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..09ee73a00ccf5f999739c7257680bd7059e89957 --- /dev/null +++ b/checkpoint-1400/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ec089fc34ad501b8c0a8378fe4ca6660a53358dec7456aa53982d7f00d8b485 +size 22573704 diff --git a/checkpoint-1400/optimizer.pt b/checkpoint-1400/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..856e66a742660c7c1b6447ff1be0ac1a8cb2e435 --- /dev/null +++ b/checkpoint-1400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0675dc87578168e92eca0ecde8e13b71f0bf6bdf4d0e1822fe7676291aca7b30 +size 45276986 diff --git a/checkpoint-1400/rng_state.pth b/checkpoint-1400/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..323d589b0258d085c7a07b0c4f97b26796e865c5 --- /dev/null +++ b/checkpoint-1400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9196a1e708bf24d6abba41cce3f8558820acc3e50f9394c5955e29eb41ffea3d +size 14244 diff --git a/checkpoint-1400/scheduler.pt b/checkpoint-1400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b7f5fe8c664d2455e487cd7b7ae5713c678ee84c --- /dev/null +++ b/checkpoint-1400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:965f92ef1c492826aab0266c7f7bca9df480dd00d8819c66c4840910e2350eea +size 1064 diff --git a/checkpoint-1400/special_tokens_map.json b/checkpoint-1400/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/checkpoint-1400/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/checkpoint-1400/tokenizer.json b/checkpoint-1400/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-1400/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-1400/tokenizer_config.json b/checkpoint-1400/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61c03bf2b5e0c03056cebf7a58c8c096dafc1ffc --- /dev/null +++ b/checkpoint-1400/tokenizer_config.json @@ -0,0 +1,2065 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end_of_text|>' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|end_of_text|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1400/trainer_state.json b/checkpoint-1400/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..da9eb074c7291c69643034eb32beb2315261f13b --- /dev/null +++ b/checkpoint-1400/trainer_state.json @@ -0,0 +1,1993 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.24, + "eval_steps": 500, + "global_step": 1400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008, + "grad_norm": 0.8485889434814453, + "learning_rate": 0.0001999964908278481, + "loss": 1.2049, + "step": 5 + }, + { + "epoch": 0.016, + "grad_norm": 0.47789862751960754, + "learning_rate": 0.00019998596355767805, + "loss": 0.9333, + "step": 10 + }, + { + "epoch": 0.024, + "grad_norm": 1.017558217048645, + "learning_rate": 0.00019996841892833, + "loss": 0.8671, + "step": 15 + }, + { + "epoch": 0.032, + "grad_norm": 0.6610977053642273, + "learning_rate": 0.00019994385817114646, + "loss": 0.7979, + "step": 20 + }, + { + "epoch": 0.04, + "grad_norm": 0.6075429320335388, + "learning_rate": 0.00019991228300988585, + "loss": 0.7662, + "step": 25 + }, + { + "epoch": 0.048, + "grad_norm": 0.6595763564109802, + "learning_rate": 0.00019987369566060176, + "loss": 0.7929, + "step": 30 + }, + { + "epoch": 0.056, + "grad_norm": 0.6968618035316467, + "learning_rate": 0.00019982809883148722, + "loss": 0.7683, + "step": 35 + }, + { + "epoch": 0.064, + "grad_norm": 0.4889592230319977, + "learning_rate": 0.00019977549572268468, + "loss": 0.8667, + "step": 40 + }, + { + "epoch": 0.072, + "grad_norm": 0.6651108264923096, + "learning_rate": 0.0001997158900260614, + "loss": 0.8446, + "step": 45 + }, + { + "epoch": 0.08, + "grad_norm": 0.5898510217666626, + "learning_rate": 0.00019964928592495045, + "loss": 0.9051, + "step": 50 + }, + { + "epoch": 0.088, + "grad_norm": 0.4398016035556793, + "learning_rate": 0.00019957568809385694, + "loss": 0.7235, + "step": 55 + }, + { + "epoch": 0.096, + "grad_norm": 0.6901968121528625, + "learning_rate": 0.00019949510169813003, + "loss": 0.8169, + "step": 60 + }, + { + "epoch": 0.104, + "grad_norm": 0.6267213225364685, + "learning_rate": 0.00019940753239360047, + "loss": 0.8266, + "step": 65 + }, + { + "epoch": 0.112, + "grad_norm": 0.48524895310401917, + "learning_rate": 0.00019931298632618356, + "loss": 0.758, + "step": 70 + }, + { + "epoch": 0.12, + "grad_norm": 0.5294132232666016, + "learning_rate": 0.0001992114701314478, + "loss": 0.7759, + "step": 75 + }, + { + "epoch": 0.128, + "grad_norm": 0.48957982659339905, + "learning_rate": 0.0001991029909341493, + "loss": 0.7797, + "step": 80 + }, + { + "epoch": 0.136, + "grad_norm": 0.645412802696228, + "learning_rate": 0.00019898755634773158, + "loss": 0.7437, + "step": 85 + }, + { + "epoch": 0.144, + "grad_norm": 0.43297675251960754, + "learning_rate": 0.0001988651744737914, + "loss": 0.8043, + "step": 90 + }, + { + "epoch": 0.152, + "grad_norm": 0.5513920783996582, + "learning_rate": 0.00019873585390151003, + "loss": 0.7701, + "step": 95 + }, + { + "epoch": 0.16, + "grad_norm": 0.8462435007095337, + "learning_rate": 0.0001985996037070505, + "loss": 0.709, + "step": 100 + }, + { + "epoch": 0.168, + "grad_norm": 0.6892585158348083, + "learning_rate": 0.00019845643345292054, + "loss": 0.7377, + "step": 105 + }, + { + "epoch": 0.176, + "grad_norm": 0.4617864191532135, + "learning_rate": 0.00019830635318730154, + "loss": 0.8352, + "step": 110 + }, + { + "epoch": 0.184, + "grad_norm": 0.6300354599952698, + "learning_rate": 0.0001981493734433433, + "loss": 0.7738, + "step": 115 + }, + { + "epoch": 0.192, + "grad_norm": 0.8086859583854675, + "learning_rate": 0.0001979855052384247, + "loss": 0.8067, + "step": 120 + }, + { + "epoch": 0.2, + "grad_norm": 0.6272985935211182, + "learning_rate": 0.00019781476007338058, + "loss": 0.7456, + "step": 125 + }, + { + "epoch": 0.208, + "grad_norm": 0.44750839471817017, + "learning_rate": 0.00019763714993169452, + "loss": 0.758, + "step": 130 + }, + { + "epoch": 0.216, + "grad_norm": 0.5053977370262146, + "learning_rate": 0.00019745268727865774, + "loss": 0.7895, + "step": 135 + }, + { + "epoch": 0.224, + "grad_norm": 0.41920769214630127, + "learning_rate": 0.00019726138506049438, + "loss": 0.7302, + "step": 140 + }, + { + "epoch": 0.232, + "grad_norm": 0.38280290365219116, + "learning_rate": 0.00019706325670345275, + "loss": 0.8152, + "step": 145 + }, + { + "epoch": 0.24, + "grad_norm": 0.554710865020752, + "learning_rate": 0.0001968583161128631, + "loss": 0.8461, + "step": 150 + }, + { + "epoch": 0.248, + "grad_norm": 0.5612509250640869, + "learning_rate": 0.00019664657767216176, + "loss": 0.7787, + "step": 155 + }, + { + "epoch": 0.256, + "grad_norm": 0.610614538192749, + "learning_rate": 0.00019642805624188147, + "loss": 0.7574, + "step": 160 + }, + { + "epoch": 0.264, + "grad_norm": 0.679517924785614, + "learning_rate": 0.0001962027671586086, + "loss": 0.8487, + "step": 165 + }, + { + "epoch": 0.272, + "grad_norm": 0.6685434579849243, + "learning_rate": 0.00019597072623390668, + "loss": 0.6611, + "step": 170 + }, + { + "epoch": 0.28, + "grad_norm": 0.480293869972229, + "learning_rate": 0.00019573194975320673, + "loss": 0.7802, + "step": 175 + }, + { + "epoch": 0.288, + "grad_norm": 0.7727369070053101, + "learning_rate": 0.00019548645447466431, + "loss": 0.6727, + "step": 180 + }, + { + "epoch": 0.296, + "grad_norm": 0.6371043920516968, + "learning_rate": 0.00019523425762798329, + "loss": 0.7502, + "step": 185 + }, + { + "epoch": 0.304, + "grad_norm": 0.6399966478347778, + "learning_rate": 0.00019497537691320668, + "loss": 0.8401, + "step": 190 + }, + { + "epoch": 0.312, + "grad_norm": 0.7263137698173523, + "learning_rate": 0.00019470983049947444, + "loss": 0.7494, + "step": 195 + }, + { + "epoch": 0.32, + "grad_norm": 0.402416467666626, + "learning_rate": 0.00019443763702374812, + "loss": 0.7842, + "step": 200 + }, + { + "epoch": 0.328, + "grad_norm": 0.6639626026153564, + "learning_rate": 0.00019415881558950302, + "loss": 0.8082, + "step": 205 + }, + { + "epoch": 0.336, + "grad_norm": 0.5801042914390564, + "learning_rate": 0.00019387338576538744, + "loss": 0.7883, + "step": 210 + }, + { + "epoch": 0.344, + "grad_norm": 0.5533607006072998, + "learning_rate": 0.00019358136758384912, + "loss": 0.7356, + "step": 215 + }, + { + "epoch": 0.352, + "grad_norm": 0.6019654273986816, + "learning_rate": 0.00019328278153972947, + "loss": 0.7891, + "step": 220 + }, + { + "epoch": 0.36, + "grad_norm": 0.5344104170799255, + "learning_rate": 0.00019297764858882514, + "loss": 0.7671, + "step": 225 + }, + { + "epoch": 0.368, + "grad_norm": 0.5494843125343323, + "learning_rate": 0.0001926659901464172, + "loss": 0.6608, + "step": 230 + }, + { + "epoch": 0.376, + "grad_norm": 0.465420126914978, + "learning_rate": 0.00019234782808576824, + "loss": 0.647, + "step": 235 + }, + { + "epoch": 0.384, + "grad_norm": 0.5202775001525879, + "learning_rate": 0.00019202318473658705, + "loss": 0.729, + "step": 240 + }, + { + "epoch": 0.392, + "grad_norm": 0.5757818222045898, + "learning_rate": 0.00019169208288346166, + "loss": 0.6713, + "step": 245 + }, + { + "epoch": 0.4, + "grad_norm": 0.46555572748184204, + "learning_rate": 0.0001913545457642601, + "loss": 0.7049, + "step": 250 + }, + { + "epoch": 0.408, + "grad_norm": 0.5101790428161621, + "learning_rate": 0.00019101059706849957, + "loss": 0.7419, + "step": 255 + }, + { + "epoch": 0.416, + "grad_norm": 0.6083744764328003, + "learning_rate": 0.00019066026093568378, + "loss": 0.7148, + "step": 260 + }, + { + "epoch": 0.424, + "grad_norm": 0.4719640612602234, + "learning_rate": 0.00019030356195360874, + "loss": 0.7493, + "step": 265 + }, + { + "epoch": 0.432, + "grad_norm": 0.7365225553512573, + "learning_rate": 0.0001899405251566371, + "loss": 0.7652, + "step": 270 + }, + { + "epoch": 0.44, + "grad_norm": 0.4452705383300781, + "learning_rate": 0.0001895711760239413, + "loss": 0.7438, + "step": 275 + }, + { + "epoch": 0.448, + "grad_norm": 0.6071786284446716, + "learning_rate": 0.0001891955404777151, + "loss": 0.7683, + "step": 280 + }, + { + "epoch": 0.456, + "grad_norm": 0.5774498581886292, + "learning_rate": 0.00018881364488135448, + "loss": 0.8115, + "step": 285 + }, + { + "epoch": 0.464, + "grad_norm": 0.6134682893753052, + "learning_rate": 0.00018842551603760724, + "loss": 0.8335, + "step": 290 + }, + { + "epoch": 0.472, + "grad_norm": 0.4869893193244934, + "learning_rate": 0.00018803118118669202, + "loss": 0.6933, + "step": 295 + }, + { + "epoch": 0.48, + "grad_norm": 0.6457111239433289, + "learning_rate": 0.00018763066800438636, + "loss": 0.7515, + "step": 300 + }, + { + "epoch": 0.488, + "grad_norm": 0.59674471616745, + "learning_rate": 0.0001872240046000844, + "loss": 0.6931, + "step": 305 + }, + { + "epoch": 0.496, + "grad_norm": 0.44608160853385925, + "learning_rate": 0.00018681121951482393, + "loss": 0.782, + "step": 310 + }, + { + "epoch": 0.504, + "grad_norm": 0.5934664607048035, + "learning_rate": 0.00018639234171928353, + "loss": 0.7361, + "step": 315 + }, + { + "epoch": 0.512, + "grad_norm": 0.49716323614120483, + "learning_rate": 0.0001859674006117491, + "loss": 0.7443, + "step": 320 + }, + { + "epoch": 0.52, + "grad_norm": 0.47995495796203613, + "learning_rate": 0.00018553642601605068, + "loss": 0.7221, + "step": 325 + }, + { + "epoch": 0.528, + "grad_norm": 0.5177399516105652, + "learning_rate": 0.00018509944817946922, + "loss": 0.7622, + "step": 330 + }, + { + "epoch": 0.536, + "grad_norm": 0.6638798713684082, + "learning_rate": 0.0001846564977706138, + "loss": 0.8556, + "step": 335 + }, + { + "epoch": 0.544, + "grad_norm": 0.5056771636009216, + "learning_rate": 0.00018420760587726923, + "loss": 0.7814, + "step": 340 + }, + { + "epoch": 0.552, + "grad_norm": 0.44543707370758057, + "learning_rate": 0.0001837528040042142, + "loss": 0.722, + "step": 345 + }, + { + "epoch": 0.56, + "grad_norm": 0.6765120625495911, + "learning_rate": 0.00018329212407100994, + "loss": 0.7903, + "step": 350 + }, + { + "epoch": 0.568, + "grad_norm": 0.49232372641563416, + "learning_rate": 0.00018282559840976042, + "loss": 0.6996, + "step": 355 + }, + { + "epoch": 0.576, + "grad_norm": 0.47392791509628296, + "learning_rate": 0.00018235325976284275, + "loss": 0.773, + "step": 360 + }, + { + "epoch": 0.584, + "grad_norm": 0.5056615471839905, + "learning_rate": 0.00018187514128060946, + "loss": 0.728, + "step": 365 + }, + { + "epoch": 0.592, + "grad_norm": 0.5857616662979126, + "learning_rate": 0.00018139127651906184, + "loss": 0.7659, + "step": 370 + }, + { + "epoch": 0.6, + "grad_norm": 0.5966864228248596, + "learning_rate": 0.00018090169943749476, + "loss": 0.7039, + "step": 375 + }, + { + "epoch": 0.608, + "grad_norm": 0.4524347484111786, + "learning_rate": 0.00018040644439611348, + "loss": 0.7125, + "step": 380 + }, + { + "epoch": 0.616, + "grad_norm": 0.5570976138114929, + "learning_rate": 0.00017990554615362198, + "loss": 0.698, + "step": 385 + }, + { + "epoch": 0.624, + "grad_norm": 0.6045777201652527, + "learning_rate": 0.00017939903986478355, + "loss": 0.8255, + "step": 390 + }, + { + "epoch": 0.632, + "grad_norm": 0.6149687767028809, + "learning_rate": 0.00017888696107795342, + "loss": 0.6616, + "step": 395 + }, + { + "epoch": 0.64, + "grad_norm": 0.4873579144477844, + "learning_rate": 0.000178369345732584, + "loss": 0.7452, + "step": 400 + }, + { + "epoch": 0.648, + "grad_norm": 0.5569061636924744, + "learning_rate": 0.00017784623015670238, + "loss": 0.7652, + "step": 405 + }, + { + "epoch": 0.656, + "grad_norm": 0.5825181603431702, + "learning_rate": 0.00017731765106436073, + "loss": 0.7793, + "step": 410 + }, + { + "epoch": 0.664, + "grad_norm": 0.4047383666038513, + "learning_rate": 0.00017678364555305978, + "loss": 0.6875, + "step": 415 + }, + { + "epoch": 0.672, + "grad_norm": 0.5080836415290833, + "learning_rate": 0.0001762442511011448, + "loss": 0.7465, + "step": 420 + }, + { + "epoch": 0.68, + "grad_norm": 0.5825940370559692, + "learning_rate": 0.00017569950556517566, + "loss": 0.7205, + "step": 425 + }, + { + "epoch": 0.688, + "grad_norm": 0.476992666721344, + "learning_rate": 0.00017514944717726962, + "loss": 0.6589, + "step": 430 + }, + { + "epoch": 0.696, + "grad_norm": 0.7424727082252502, + "learning_rate": 0.00017459411454241822, + "loss": 0.7035, + "step": 435 + }, + { + "epoch": 0.704, + "grad_norm": 0.6544787287712097, + "learning_rate": 0.00017403354663577783, + "loss": 0.787, + "step": 440 + }, + { + "epoch": 0.712, + "grad_norm": 0.49425187706947327, + "learning_rate": 0.00017346778279993415, + "loss": 0.7515, + "step": 445 + }, + { + "epoch": 0.72, + "grad_norm": 0.5473236441612244, + "learning_rate": 0.00017289686274214118, + "loss": 0.7199, + "step": 450 + }, + { + "epoch": 0.728, + "grad_norm": 0.6773544549942017, + "learning_rate": 0.00017232082653153422, + "loss": 0.8037, + "step": 455 + }, + { + "epoch": 0.736, + "grad_norm": 0.6355096101760864, + "learning_rate": 0.00017173971459631787, + "loss": 0.7502, + "step": 460 + }, + { + "epoch": 0.744, + "grad_norm": 0.47867000102996826, + "learning_rate": 0.00017115356772092857, + "loss": 0.7446, + "step": 465 + }, + { + "epoch": 0.752, + "grad_norm": 0.5135357975959778, + "learning_rate": 0.0001705624270431721, + "loss": 0.6507, + "step": 470 + }, + { + "epoch": 0.76, + "grad_norm": 0.48866042494773865, + "learning_rate": 0.00016996633405133655, + "loss": 0.7164, + "step": 475 + }, + { + "epoch": 0.768, + "grad_norm": 0.5892354249954224, + "learning_rate": 0.0001693653305812805, + "loss": 0.7621, + "step": 480 + }, + { + "epoch": 0.776, + "grad_norm": 0.6633970141410828, + "learning_rate": 0.00016875945881349676, + "loss": 0.7623, + "step": 485 + }, + { + "epoch": 0.784, + "grad_norm": 0.6444060802459717, + "learning_rate": 0.000168148761270152, + "loss": 0.6606, + "step": 490 + }, + { + "epoch": 0.792, + "grad_norm": 0.7012648582458496, + "learning_rate": 0.00016753328081210245, + "loss": 0.6941, + "step": 495 + }, + { + "epoch": 0.8, + "grad_norm": 0.7064160704612732, + "learning_rate": 0.00016691306063588583, + "loss": 0.6841, + "step": 500 + }, + { + "epoch": 0.808, + "grad_norm": 0.7241398096084595, + "learning_rate": 0.00016628814427068953, + "loss": 0.6996, + "step": 505 + }, + { + "epoch": 0.816, + "grad_norm": 0.7807374596595764, + "learning_rate": 0.00016565857557529566, + "loss": 0.7542, + "step": 510 + }, + { + "epoch": 0.824, + "grad_norm": 0.763768196105957, + "learning_rate": 0.00016502439873500289, + "loss": 0.7175, + "step": 515 + }, + { + "epoch": 0.832, + "grad_norm": 0.6105090379714966, + "learning_rate": 0.0001643856582585254, + "loss": 0.7565, + "step": 520 + }, + { + "epoch": 0.84, + "grad_norm": 0.5686540603637695, + "learning_rate": 0.000163742398974869, + "loss": 0.7339, + "step": 525 + }, + { + "epoch": 0.848, + "grad_norm": 0.5341500043869019, + "learning_rate": 0.00016309466603018496, + "loss": 0.569, + "step": 530 + }, + { + "epoch": 0.856, + "grad_norm": 0.7274748682975769, + "learning_rate": 0.00016244250488460158, + "loss": 0.7556, + "step": 535 + }, + { + "epoch": 0.864, + "grad_norm": 0.7321165204048157, + "learning_rate": 0.00016178596130903344, + "loss": 0.7084, + "step": 540 + }, + { + "epoch": 0.872, + "grad_norm": 0.5086159110069275, + "learning_rate": 0.00016112508138196917, + "loss": 0.6935, + "step": 545 + }, + { + "epoch": 0.88, + "grad_norm": 0.4714389443397522, + "learning_rate": 0.0001604599114862375, + "loss": 0.7076, + "step": 550 + }, + { + "epoch": 0.888, + "grad_norm": 0.5031452178955078, + "learning_rate": 0.0001597904983057519, + "loss": 0.7151, + "step": 555 + }, + { + "epoch": 0.896, + "grad_norm": 0.7745943665504456, + "learning_rate": 0.0001591168888222342, + "loss": 0.7001, + "step": 560 + }, + { + "epoch": 0.904, + "grad_norm": 0.6076303124427795, + "learning_rate": 0.00015843913031191723, + "loss": 0.7285, + "step": 565 + }, + { + "epoch": 0.912, + "grad_norm": 0.7456529140472412, + "learning_rate": 0.00015775727034222675, + "loss": 0.8041, + "step": 570 + }, + { + "epoch": 0.92, + "grad_norm": 0.5760998725891113, + "learning_rate": 0.0001570713567684432, + "loss": 0.7353, + "step": 575 + }, + { + "epoch": 0.928, + "grad_norm": 0.7057327032089233, + "learning_rate": 0.00015638143773034267, + "loss": 0.7792, + "step": 580 + }, + { + "epoch": 0.936, + "grad_norm": 0.7615967392921448, + "learning_rate": 0.00015568756164881882, + "loss": 1.0121, + "step": 585 + }, + { + "epoch": 0.944, + "grad_norm": 0.6304950714111328, + "learning_rate": 0.000154989777222484, + "loss": 0.7727, + "step": 590 + }, + { + "epoch": 0.952, + "grad_norm": 0.6852543950080872, + "learning_rate": 0.00015428813342425177, + "loss": 0.741, + "step": 595 + }, + { + "epoch": 0.96, + "grad_norm": 0.6379660964012146, + "learning_rate": 0.00015358267949789966, + "loss": 0.6919, + "step": 600 + }, + { + "epoch": 0.968, + "grad_norm": 0.5846463441848755, + "learning_rate": 0.00015287346495461315, + "loss": 0.7163, + "step": 605 + }, + { + "epoch": 0.976, + "grad_norm": 0.5999557971954346, + "learning_rate": 0.0001521605395695108, + "loss": 0.8152, + "step": 610 + }, + { + "epoch": 0.984, + "grad_norm": 0.5806307196617126, + "learning_rate": 0.00015144395337815064, + "loss": 0.6709, + "step": 615 + }, + { + "epoch": 0.992, + "grad_norm": 0.6559942960739136, + "learning_rate": 0.00015072375667301893, + "loss": 0.6527, + "step": 620 + }, + { + "epoch": 1.0, + "grad_norm": 0.6287715435028076, + "learning_rate": 0.00015000000000000001, + "loss": 0.8194, + "step": 625 + }, + { + "epoch": 1.008, + "grad_norm": 0.616222620010376, + "learning_rate": 0.00014927273415482915, + "loss": 0.6627, + "step": 630 + }, + { + "epoch": 1.016, + "grad_norm": 0.4750412106513977, + "learning_rate": 0.0001485420101795274, + "loss": 0.6366, + "step": 635 + }, + { + "epoch": 1.024, + "grad_norm": 0.5122964978218079, + "learning_rate": 0.00014780787935881923, + "loss": 0.6717, + "step": 640 + }, + { + "epoch": 1.032, + "grad_norm": 0.7382633090019226, + "learning_rate": 0.0001470703932165333, + "loss": 0.6483, + "step": 645 + }, + { + "epoch": 1.04, + "grad_norm": 0.6540554761886597, + "learning_rate": 0.00014632960351198618, + "loss": 0.6151, + "step": 650 + }, + { + "epoch": 1.048, + "grad_norm": 0.4776591956615448, + "learning_rate": 0.00014558556223635003, + "loss": 0.6707, + "step": 655 + }, + { + "epoch": 1.056, + "grad_norm": 0.8012662529945374, + "learning_rate": 0.00014483832160900326, + "loss": 0.6125, + "step": 660 + }, + { + "epoch": 1.064, + "grad_norm": 0.6735953092575073, + "learning_rate": 0.00014408793407386588, + "loss": 0.6206, + "step": 665 + }, + { + "epoch": 1.072, + "grad_norm": 0.5640230774879456, + "learning_rate": 0.00014333445229571873, + "loss": 0.6161, + "step": 670 + }, + { + "epoch": 1.08, + "grad_norm": 0.5928654074668884, + "learning_rate": 0.00014257792915650728, + "loss": 0.6583, + "step": 675 + }, + { + "epoch": 1.088, + "grad_norm": 0.7347397208213806, + "learning_rate": 0.00014181841775163013, + "loss": 0.6222, + "step": 680 + }, + { + "epoch": 1.096, + "grad_norm": 0.593773365020752, + "learning_rate": 0.0001410559713862128, + "loss": 0.716, + "step": 685 + }, + { + "epoch": 1.104, + "grad_norm": 0.6244611144065857, + "learning_rate": 0.00014029064357136628, + "loss": 0.6198, + "step": 690 + }, + { + "epoch": 1.112, + "grad_norm": 0.5083370804786682, + "learning_rate": 0.00013952248802043165, + "loss": 0.6389, + "step": 695 + }, + { + "epoch": 1.12, + "grad_norm": 0.5241413116455078, + "learning_rate": 0.0001387515586452103, + "loss": 0.6842, + "step": 700 + }, + { + "epoch": 1.1280000000000001, + "grad_norm": 0.524029016494751, + "learning_rate": 0.00013797790955218014, + "loss": 0.6071, + "step": 705 + }, + { + "epoch": 1.1360000000000001, + "grad_norm": 0.5097878575325012, + "learning_rate": 0.00013720159503869815, + "loss": 0.5915, + "step": 710 + }, + { + "epoch": 1.144, + "grad_norm": 0.5782963037490845, + "learning_rate": 0.00013642266958918984, + "loss": 0.6794, + "step": 715 + }, + { + "epoch": 1.152, + "grad_norm": 0.6088266372680664, + "learning_rate": 0.00013564118787132506, + "loss": 0.6773, + "step": 720 + }, + { + "epoch": 1.16, + "grad_norm": 0.7768995761871338, + "learning_rate": 0.00013485720473218154, + "loss": 0.668, + "step": 725 + }, + { + "epoch": 1.168, + "grad_norm": 0.6645551919937134, + "learning_rate": 0.0001340707751943952, + "loss": 0.6997, + "step": 730 + }, + { + "epoch": 1.176, + "grad_norm": 0.9228842258453369, + "learning_rate": 0.00013328195445229868, + "loss": 0.831, + "step": 735 + }, + { + "epoch": 1.184, + "grad_norm": 0.7556049823760986, + "learning_rate": 0.00013249079786804765, + "loss": 0.6378, + "step": 740 + }, + { + "epoch": 1.192, + "grad_norm": 0.832775354385376, + "learning_rate": 0.0001316973609677352, + "loss": 0.6547, + "step": 745 + }, + { + "epoch": 1.2, + "grad_norm": 0.7329304814338684, + "learning_rate": 0.00013090169943749476, + "loss": 0.5808, + "step": 750 + }, + { + "epoch": 1.208, + "grad_norm": 0.7193475961685181, + "learning_rate": 0.00013010386911959206, + "loss": 0.5582, + "step": 755 + }, + { + "epoch": 1.216, + "grad_norm": 0.6274734735488892, + "learning_rate": 0.00012930392600850573, + "loss": 0.5801, + "step": 760 + }, + { + "epoch": 1.224, + "grad_norm": 0.6485865712165833, + "learning_rate": 0.0001285019262469976, + "loss": 0.65, + "step": 765 + }, + { + "epoch": 1.232, + "grad_norm": 0.7164427042007446, + "learning_rate": 0.00012769792612217224, + "loss": 0.6627, + "step": 770 + }, + { + "epoch": 1.24, + "grad_norm": 0.600775957107544, + "learning_rate": 0.00012689198206152657, + "loss": 0.5603, + "step": 775 + }, + { + "epoch": 1.248, + "grad_norm": 0.8377975225448608, + "learning_rate": 0.00012608415062898972, + "loss": 0.6525, + "step": 780 + }, + { + "epoch": 1.256, + "grad_norm": 0.8069924116134644, + "learning_rate": 0.00012527448852095295, + "loss": 0.6731, + "step": 785 + }, + { + "epoch": 1.264, + "grad_norm": 0.6501213908195496, + "learning_rate": 0.00012446305256229073, + "loss": 0.6255, + "step": 790 + }, + { + "epoch": 1.272, + "grad_norm": 0.62812340259552, + "learning_rate": 0.00012364989970237248, + "loss": 0.6585, + "step": 795 + }, + { + "epoch": 1.28, + "grad_norm": 0.5702307820320129, + "learning_rate": 0.00012283508701106557, + "loss": 0.5996, + "step": 800 + }, + { + "epoch": 1.288, + "grad_norm": 0.6311281323432922, + "learning_rate": 0.00012201867167473015, + "loss": 0.6355, + "step": 805 + }, + { + "epoch": 1.296, + "grad_norm": 0.5885419249534607, + "learning_rate": 0.00012120071099220549, + "loss": 0.6615, + "step": 810 + }, + { + "epoch": 1.304, + "grad_norm": 0.5239307284355164, + "learning_rate": 0.0001203812623707885, + "loss": 0.6096, + "step": 815 + }, + { + "epoch": 1.312, + "grad_norm": 0.6101869940757751, + "learning_rate": 0.00011956038332220483, + "loss": 0.5984, + "step": 820 + }, + { + "epoch": 1.32, + "grad_norm": 0.4395413100719452, + "learning_rate": 0.00011873813145857249, + "loss": 0.5569, + "step": 825 + }, + { + "epoch": 1.328, + "grad_norm": 0.8984820246696472, + "learning_rate": 0.00011791456448835825, + "loss": 0.7088, + "step": 830 + }, + { + "epoch": 1.336, + "grad_norm": 0.7709664106369019, + "learning_rate": 0.00011708974021232769, + "loss": 0.6731, + "step": 835 + }, + { + "epoch": 1.3439999999999999, + "grad_norm": 0.6782217025756836, + "learning_rate": 0.00011626371651948838, + "loss": 0.6188, + "step": 840 + }, + { + "epoch": 1.3519999999999999, + "grad_norm": 0.6427358984947205, + "learning_rate": 0.00011543655138302714, + "loss": 0.7004, + "step": 845 + }, + { + "epoch": 1.3599999999999999, + "grad_norm": 0.5902594923973083, + "learning_rate": 0.00011460830285624118, + "loss": 0.5884, + "step": 850 + }, + { + "epoch": 1.3679999999999999, + "grad_norm": 0.5935835838317871, + "learning_rate": 0.0001137790290684638, + "loss": 0.5739, + "step": 855 + }, + { + "epoch": 1.376, + "grad_norm": 0.6752728223800659, + "learning_rate": 0.00011294878822098469, + "loss": 0.6435, + "step": 860 + }, + { + "epoch": 1.384, + "grad_norm": 0.7927135825157166, + "learning_rate": 0.00011211763858296507, + "loss": 0.6897, + "step": 865 + }, + { + "epoch": 1.392, + "grad_norm": 0.714499294757843, + "learning_rate": 0.00011128563848734816, + "loss": 0.6641, + "step": 870 + }, + { + "epoch": 1.4, + "grad_norm": 0.7086356282234192, + "learning_rate": 0.00011045284632676536, + "loss": 0.6273, + "step": 875 + }, + { + "epoch": 1.408, + "grad_norm": 0.6125518679618835, + "learning_rate": 0.00010961932054943778, + "loss": 0.6437, + "step": 880 + }, + { + "epoch": 1.416, + "grad_norm": 0.5635287165641785, + "learning_rate": 0.00010878511965507434, + "loss": 0.6345, + "step": 885 + }, + { + "epoch": 1.424, + "grad_norm": 0.47936007380485535, + "learning_rate": 0.00010795030219076599, + "loss": 0.5913, + "step": 890 + }, + { + "epoch": 1.432, + "grad_norm": 0.7142558097839355, + "learning_rate": 0.00010711492674687671, + "loss": 0.6482, + "step": 895 + }, + { + "epoch": 1.44, + "grad_norm": 0.5252729058265686, + "learning_rate": 0.00010627905195293135, + "loss": 0.6165, + "step": 900 + }, + { + "epoch": 1.448, + "grad_norm": 0.896318793296814, + "learning_rate": 0.00010544273647350092, + "loss": 0.634, + "step": 905 + }, + { + "epoch": 1.456, + "grad_norm": 0.6029036045074463, + "learning_rate": 0.00010460603900408523, + "loss": 0.6509, + "step": 910 + }, + { + "epoch": 1.464, + "grad_norm": 0.6835671663284302, + "learning_rate": 0.00010376901826699348, + "loss": 0.6212, + "step": 915 + }, + { + "epoch": 1.472, + "grad_norm": 0.7098750472068787, + "learning_rate": 0.00010293173300722285, + "loss": 0.7305, + "step": 920 + }, + { + "epoch": 1.48, + "grad_norm": 0.675316333770752, + "learning_rate": 0.0001020942419883357, + "loss": 0.6685, + "step": 925 + }, + { + "epoch": 1.488, + "grad_norm": 0.6675406098365784, + "learning_rate": 0.00010125660398833528, + "loss": 0.6214, + "step": 930 + }, + { + "epoch": 1.496, + "grad_norm": 0.6629154682159424, + "learning_rate": 0.0001004188777955404, + "loss": 0.6035, + "step": 935 + }, + { + "epoch": 1.504, + "grad_norm": 0.7732692360877991, + "learning_rate": 9.958112220445963e-05, + "loss": 0.5868, + "step": 940 + }, + { + "epoch": 1.512, + "grad_norm": 0.6238484978675842, + "learning_rate": 9.874339601166473e-05, + "loss": 0.6003, + "step": 945 + }, + { + "epoch": 1.52, + "grad_norm": 0.6622412800788879, + "learning_rate": 9.790575801166432e-05, + "loss": 0.5854, + "step": 950 + }, + { + "epoch": 1.528, + "grad_norm": 0.7598085999488831, + "learning_rate": 9.706826699277718e-05, + "loss": 0.5882, + "step": 955 + }, + { + "epoch": 1.536, + "grad_norm": 0.8870390057563782, + "learning_rate": 9.623098173300654e-05, + "loss": 0.7187, + "step": 960 + }, + { + "epoch": 1.544, + "grad_norm": 0.702232837677002, + "learning_rate": 9.539396099591476e-05, + "loss": 0.6156, + "step": 965 + }, + { + "epoch": 1.552, + "grad_norm": 0.694139301776886, + "learning_rate": 9.455726352649911e-05, + "loss": 0.6488, + "step": 970 + }, + { + "epoch": 1.56, + "grad_norm": 0.5684956312179565, + "learning_rate": 9.372094804706867e-05, + "loss": 0.6601, + "step": 975 + }, + { + "epoch": 1.568, + "grad_norm": 0.6327118277549744, + "learning_rate": 9.288507325312335e-05, + "loss": 0.5968, + "step": 980 + }, + { + "epoch": 1.576, + "grad_norm": 0.5732144117355347, + "learning_rate": 9.204969780923403e-05, + "loss": 0.7034, + "step": 985 + }, + { + "epoch": 1.584, + "grad_norm": 0.7025273442268372, + "learning_rate": 9.121488034492569e-05, + "loss": 0.5973, + "step": 990 + }, + { + "epoch": 1.592, + "grad_norm": 0.9270740151405334, + "learning_rate": 9.038067945056227e-05, + "loss": 0.7877, + "step": 995 + }, + { + "epoch": 1.6, + "grad_norm": 0.6676818132400513, + "learning_rate": 8.954715367323468e-05, + "loss": 0.644, + "step": 1000 + }, + { + "epoch": 1.608, + "grad_norm": 0.6687547564506531, + "learning_rate": 8.871436151265184e-05, + "loss": 0.6678, + "step": 1005 + }, + { + "epoch": 1.616, + "grad_norm": 0.7400322556495667, + "learning_rate": 8.788236141703498e-05, + "loss": 0.6088, + "step": 1010 + }, + { + "epoch": 1.624, + "grad_norm": 0.5504963994026184, + "learning_rate": 8.705121177901532e-05, + "loss": 0.6219, + "step": 1015 + }, + { + "epoch": 1.6320000000000001, + "grad_norm": 0.8088738322257996, + "learning_rate": 8.62209709315362e-05, + "loss": 0.6698, + "step": 1020 + }, + { + "epoch": 1.6400000000000001, + "grad_norm": 0.7380816340446472, + "learning_rate": 8.539169714375885e-05, + "loss": 0.6207, + "step": 1025 + }, + { + "epoch": 1.6480000000000001, + "grad_norm": 0.6346850395202637, + "learning_rate": 8.456344861697289e-05, + "loss": 0.626, + "step": 1030 + }, + { + "epoch": 1.6560000000000001, + "grad_norm": 0.49918702244758606, + "learning_rate": 8.373628348051165e-05, + "loss": 0.6972, + "step": 1035 + }, + { + "epoch": 1.6640000000000001, + "grad_norm": 0.7200607657432556, + "learning_rate": 8.291025978767235e-05, + "loss": 0.6282, + "step": 1040 + }, + { + "epoch": 1.6720000000000002, + "grad_norm": 0.8350688815116882, + "learning_rate": 8.208543551164178e-05, + "loss": 0.6219, + "step": 1045 + }, + { + "epoch": 1.6800000000000002, + "grad_norm": 0.5957468748092651, + "learning_rate": 8.126186854142752e-05, + "loss": 0.622, + "step": 1050 + }, + { + "epoch": 1.688, + "grad_norm": 0.7494757175445557, + "learning_rate": 8.04396166777952e-05, + "loss": 0.5801, + "step": 1055 + }, + { + "epoch": 1.696, + "grad_norm": 0.7109113931655884, + "learning_rate": 7.961873762921153e-05, + "loss": 0.598, + "step": 1060 + }, + { + "epoch": 1.704, + "grad_norm": 0.7941219210624695, + "learning_rate": 7.879928900779456e-05, + "loss": 0.699, + "step": 1065 + }, + { + "epoch": 1.712, + "grad_norm": 0.7282152771949768, + "learning_rate": 7.798132832526986e-05, + "loss": 0.5882, + "step": 1070 + }, + { + "epoch": 1.72, + "grad_norm": 0.5923735499382019, + "learning_rate": 7.716491298893442e-05, + "loss": 0.5321, + "step": 1075 + }, + { + "epoch": 1.728, + "grad_norm": 0.5571523308753967, + "learning_rate": 7.635010029762756e-05, + "loss": 0.6647, + "step": 1080 + }, + { + "epoch": 1.736, + "grad_norm": 0.6043043732643127, + "learning_rate": 7.553694743770928e-05, + "loss": 0.628, + "step": 1085 + }, + { + "epoch": 1.744, + "grad_norm": 0.8733720183372498, + "learning_rate": 7.472551147904708e-05, + "loss": 0.6262, + "step": 1090 + }, + { + "epoch": 1.752, + "grad_norm": 0.5561848878860474, + "learning_rate": 7.391584937101033e-05, + "loss": 0.6131, + "step": 1095 + }, + { + "epoch": 1.76, + "grad_norm": 0.6611356735229492, + "learning_rate": 7.310801793847344e-05, + "loss": 0.6494, + "step": 1100 + }, + { + "epoch": 1.768, + "grad_norm": 0.5700333714485168, + "learning_rate": 7.230207387782776e-05, + "loss": 0.5514, + "step": 1105 + }, + { + "epoch": 1.776, + "grad_norm": 0.705662727355957, + "learning_rate": 7.149807375300239e-05, + "loss": 0.5823, + "step": 1110 + }, + { + "epoch": 1.784, + "grad_norm": 0.9632449150085449, + "learning_rate": 7.069607399149428e-05, + "loss": 0.7207, + "step": 1115 + }, + { + "epoch": 1.792, + "grad_norm": 0.6412242650985718, + "learning_rate": 6.989613088040796e-05, + "loss": 0.7006, + "step": 1120 + }, + { + "epoch": 1.8, + "grad_norm": 0.538044273853302, + "learning_rate": 6.909830056250527e-05, + "loss": 0.6222, + "step": 1125 + }, + { + "epoch": 1.808, + "grad_norm": 0.6124762296676636, + "learning_rate": 6.830263903226483e-05, + "loss": 0.6569, + "step": 1130 + }, + { + "epoch": 1.8159999999999998, + "grad_norm": 0.7250857949256897, + "learning_rate": 6.750920213195238e-05, + "loss": 0.543, + "step": 1135 + }, + { + "epoch": 1.8239999999999998, + "grad_norm": 0.6840488910675049, + "learning_rate": 6.671804554770135e-05, + "loss": 0.6334, + "step": 1140 + }, + { + "epoch": 1.8319999999999999, + "grad_norm": 0.8932304382324219, + "learning_rate": 6.592922480560483e-05, + "loss": 0.6701, + "step": 1145 + }, + { + "epoch": 1.8399999999999999, + "grad_norm": 0.9220761656761169, + "learning_rate": 6.51427952678185e-05, + "loss": 0.6216, + "step": 1150 + }, + { + "epoch": 1.8479999999999999, + "grad_norm": 0.688731849193573, + "learning_rate": 6.435881212867493e-05, + "loss": 0.5877, + "step": 1155 + }, + { + "epoch": 1.8559999999999999, + "grad_norm": 0.6370134949684143, + "learning_rate": 6.357733041081018e-05, + "loss": 0.6256, + "step": 1160 + }, + { + "epoch": 1.8639999999999999, + "grad_norm": 0.7421153783798218, + "learning_rate": 6.27984049613019e-05, + "loss": 0.7062, + "step": 1165 + }, + { + "epoch": 1.8719999999999999, + "grad_norm": 0.8129108548164368, + "learning_rate": 6.20220904478199e-05, + "loss": 0.6304, + "step": 1170 + }, + { + "epoch": 1.88, + "grad_norm": 0.7866687774658203, + "learning_rate": 6.12484413547897e-05, + "loss": 0.7695, + "step": 1175 + }, + { + "epoch": 1.888, + "grad_norm": 0.8307198882102966, + "learning_rate": 6.047751197956838e-05, + "loss": 0.5723, + "step": 1180 + }, + { + "epoch": 1.896, + "grad_norm": 0.5541536211967468, + "learning_rate": 5.9709356428633746e-05, + "loss": 0.6847, + "step": 1185 + }, + { + "epoch": 1.904, + "grad_norm": 0.731555163860321, + "learning_rate": 5.8944028613787206e-05, + "loss": 0.6618, + "step": 1190 + }, + { + "epoch": 1.912, + "grad_norm": 1.020991325378418, + "learning_rate": 5.818158224836987e-05, + "loss": 0.6275, + "step": 1195 + }, + { + "epoch": 1.92, + "grad_norm": 0.7172287702560425, + "learning_rate": 5.7422070843492734e-05, + "loss": 0.5617, + "step": 1200 + }, + { + "epoch": 1.928, + "grad_norm": 0.7232096791267395, + "learning_rate": 5.666554770428129e-05, + "loss": 0.6278, + "step": 1205 + }, + { + "epoch": 1.936, + "grad_norm": 0.7234194278717041, + "learning_rate": 5.591206592613416e-05, + "loss": 0.6713, + "step": 1210 + }, + { + "epoch": 1.944, + "grad_norm": 0.6906010508537292, + "learning_rate": 5.5161678390996796e-05, + "loss": 0.6113, + "step": 1215 + }, + { + "epoch": 1.952, + "grad_norm": 0.9057906270027161, + "learning_rate": 5.441443776365003e-05, + "loss": 0.5587, + "step": 1220 + }, + { + "epoch": 1.96, + "grad_norm": 0.7144932150840759, + "learning_rate": 5.3670396488013854e-05, + "loss": 0.5601, + "step": 1225 + }, + { + "epoch": 1.968, + "grad_norm": 1.0071252584457397, + "learning_rate": 5.292960678346675e-05, + "loss": 0.5941, + "step": 1230 + }, + { + "epoch": 1.976, + "grad_norm": 0.7061260342597961, + "learning_rate": 5.2192120641180786e-05, + "loss": 0.6285, + "step": 1235 + }, + { + "epoch": 1.984, + "grad_norm": 0.6937069892883301, + "learning_rate": 5.145798982047261e-05, + "loss": 0.6516, + "step": 1240 + }, + { + "epoch": 1.992, + "grad_norm": 0.5176392793655396, + "learning_rate": 5.072726584517086e-05, + "loss": 0.5904, + "step": 1245 + }, + { + "epoch": 2.0, + "grad_norm": 0.8501409292221069, + "learning_rate": 5.000000000000002e-05, + "loss": 0.619, + "step": 1250 + }, + { + "epoch": 2.008, + "grad_norm": 0.5451085567474365, + "learning_rate": 4.927624332698109e-05, + "loss": 0.6058, + "step": 1255 + }, + { + "epoch": 2.016, + "grad_norm": 0.8410437107086182, + "learning_rate": 4.8556046621849346e-05, + "loss": 0.6248, + "step": 1260 + }, + { + "epoch": 2.024, + "grad_norm": 0.6089378595352173, + "learning_rate": 4.783946043048923e-05, + "loss": 0.5247, + "step": 1265 + }, + { + "epoch": 2.032, + "grad_norm": 0.47437986731529236, + "learning_rate": 4.712653504538683e-05, + "loss": 0.5439, + "step": 1270 + }, + { + "epoch": 2.04, + "grad_norm": 0.6925654411315918, + "learning_rate": 4.6417320502100316e-05, + "loss": 0.4491, + "step": 1275 + }, + { + "epoch": 2.048, + "grad_norm": 0.8538162708282471, + "learning_rate": 4.5711866575748276e-05, + "loss": 0.52, + "step": 1280 + }, + { + "epoch": 2.056, + "grad_norm": 0.8048768639564514, + "learning_rate": 4.501022277751602e-05, + "loss": 0.5259, + "step": 1285 + }, + { + "epoch": 2.064, + "grad_norm": 0.7191641330718994, + "learning_rate": 4.431243835118124e-05, + "loss": 0.5025, + "step": 1290 + }, + { + "epoch": 2.072, + "grad_norm": 1.0918892621994019, + "learning_rate": 4.361856226965733e-05, + "loss": 0.4772, + "step": 1295 + }, + { + "epoch": 2.08, + "grad_norm": 0.8152824640274048, + "learning_rate": 4.2928643231556844e-05, + "loss": 0.5945, + "step": 1300 + }, + { + "epoch": 2.088, + "grad_norm": 0.640073299407959, + "learning_rate": 4.224272965777326e-05, + "loss": 0.4813, + "step": 1305 + }, + { + "epoch": 2.096, + "grad_norm": 0.7172432541847229, + "learning_rate": 4.15608696880828e-05, + "loss": 0.5315, + "step": 1310 + }, + { + "epoch": 2.104, + "grad_norm": 1.1283674240112305, + "learning_rate": 4.08831111777658e-05, + "loss": 0.5591, + "step": 1315 + }, + { + "epoch": 2.112, + "grad_norm": 0.8184736967086792, + "learning_rate": 4.020950169424815e-05, + "loss": 0.605, + "step": 1320 + }, + { + "epoch": 2.12, + "grad_norm": 0.6823618412017822, + "learning_rate": 3.954008851376252e-05, + "loss": 0.4955, + "step": 1325 + }, + { + "epoch": 2.128, + "grad_norm": 0.8576385378837585, + "learning_rate": 3.887491861803085e-05, + "loss": 0.5757, + "step": 1330 + }, + { + "epoch": 2.136, + "grad_norm": 0.967835009098053, + "learning_rate": 3.821403869096658e-05, + "loss": 0.5313, + "step": 1335 + }, + { + "epoch": 2.144, + "grad_norm": 0.7330173254013062, + "learning_rate": 3.755749511539845e-05, + "loss": 0.5904, + "step": 1340 + }, + { + "epoch": 2.152, + "grad_norm": 0.6664792895317078, + "learning_rate": 3.690533396981504e-05, + "loss": 0.4679, + "step": 1345 + }, + { + "epoch": 2.16, + "grad_norm": 0.7639065980911255, + "learning_rate": 3.6257601025131026e-05, + "loss": 0.5235, + "step": 1350 + }, + { + "epoch": 2.168, + "grad_norm": 0.6960520148277283, + "learning_rate": 3.561434174147463e-05, + "loss": 0.5797, + "step": 1355 + }, + { + "epoch": 2.176, + "grad_norm": 0.8231356143951416, + "learning_rate": 3.497560126499709e-05, + "loss": 0.5772, + "step": 1360 + }, + { + "epoch": 2.184, + "grad_norm": 0.8968437910079956, + "learning_rate": 3.4341424424704375e-05, + "loss": 0.5316, + "step": 1365 + }, + { + "epoch": 2.192, + "grad_norm": 0.8037480711936951, + "learning_rate": 3.371185572931048e-05, + "loss": 0.5646, + "step": 1370 + }, + { + "epoch": 2.2, + "grad_norm": 0.9330148696899414, + "learning_rate": 3.308693936411421e-05, + "loss": 0.5431, + "step": 1375 + }, + { + "epoch": 2.208, + "grad_norm": 0.6958775520324707, + "learning_rate": 3.246671918789755e-05, + "loss": 0.5403, + "step": 1380 + }, + { + "epoch": 2.216, + "grad_norm": 0.9870476126670837, + "learning_rate": 3.1851238729848034e-05, + "loss": 0.5329, + "step": 1385 + }, + { + "epoch": 2.224, + "grad_norm": 0.5036590099334717, + "learning_rate": 3.124054118650327e-05, + "loss": 0.5696, + "step": 1390 + }, + { + "epoch": 2.232, + "grad_norm": 0.8640053868293762, + "learning_rate": 3.063466941871952e-05, + "loss": 0.59, + "step": 1395 + }, + { + "epoch": 2.24, + "grad_norm": 0.6065173149108887, + "learning_rate": 3.0033665948663448e-05, + "loss": 0.5116, + "step": 1400 + } + ], + "logging_steps": 5, + "max_steps": 1875, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8.74112334912553e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1400/training_args.bin b/checkpoint-1400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b72392a9094690809605a9b89988a05f68c770c0 --- /dev/null +++ b/checkpoint-1400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f6303cd7c1754cff6fd463f0e4dddf2a566ba7f29bbf2a72fbb560b32d5ce98 +size 5432 diff --git a/checkpoint-1500/README.md b/checkpoint-1500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..31d07dbe00996b0568fd0eb91d05ac9a95722459 --- /dev/null +++ b/checkpoint-1500/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.2-1B +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-1500/adapter_config.json b/checkpoint-1500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73202e5c7ba7132c890e52da7442b36a9efff4dd --- /dev/null +++ b/checkpoint-1500/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.2-1B", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "down_proj", + "k_proj", + "gate_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1500/adapter_model.safetensors b/checkpoint-1500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d58acfd6befadedd49a95b3ebdceaee9572b76d8 --- /dev/null +++ b/checkpoint-1500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5a764b1b5a93f26da53bdde530ccb8d6167f1799dcf51951ca9dc12cd2fcd8f +size 22573704 diff --git a/checkpoint-1500/optimizer.pt b/checkpoint-1500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0320707e16c4c9fb2036393cdc86e0b73094ca01 --- /dev/null +++ b/checkpoint-1500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d3abbbdfba8d1eb7b323e1e2860db19ff637ca11982a771a79720dbee2fa270 +size 45276986 diff --git a/checkpoint-1500/rng_state.pth b/checkpoint-1500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..323d589b0258d085c7a07b0c4f97b26796e865c5 --- /dev/null +++ b/checkpoint-1500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9196a1e708bf24d6abba41cce3f8558820acc3e50f9394c5955e29eb41ffea3d +size 14244 diff --git a/checkpoint-1500/scheduler.pt b/checkpoint-1500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e1b0a614624460fe0d30918ccff590ebd952a443 --- /dev/null +++ b/checkpoint-1500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afbb99b66193191c6fbfaa45808387495264b7d5ddd90b54fccbc0033a9c95b6 +size 1064 diff --git a/checkpoint-1500/special_tokens_map.json b/checkpoint-1500/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/checkpoint-1500/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/checkpoint-1500/tokenizer.json b/checkpoint-1500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-1500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-1500/tokenizer_config.json b/checkpoint-1500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61c03bf2b5e0c03056cebf7a58c8c096dafc1ffc --- /dev/null +++ b/checkpoint-1500/tokenizer_config.json @@ -0,0 +1,2065 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end_of_text|>' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|end_of_text|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1500/trainer_state.json b/checkpoint-1500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..12e4b77dd7334ea238097c242e3ca6da6c69deaa --- /dev/null +++ b/checkpoint-1500/trainer_state.json @@ -0,0 +1,2133 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.4, + "eval_steps": 500, + "global_step": 1500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008, + "grad_norm": 0.8485889434814453, + "learning_rate": 0.0001999964908278481, + "loss": 1.2049, + "step": 5 + }, + { + "epoch": 0.016, + "grad_norm": 0.47789862751960754, + "learning_rate": 0.00019998596355767805, + "loss": 0.9333, + "step": 10 + }, + { + "epoch": 0.024, + "grad_norm": 1.017558217048645, + "learning_rate": 0.00019996841892833, + "loss": 0.8671, + "step": 15 + }, + { + "epoch": 0.032, + "grad_norm": 0.6610977053642273, + "learning_rate": 0.00019994385817114646, + "loss": 0.7979, + "step": 20 + }, + { + "epoch": 0.04, + "grad_norm": 0.6075429320335388, + "learning_rate": 0.00019991228300988585, + "loss": 0.7662, + "step": 25 + }, + { + "epoch": 0.048, + "grad_norm": 0.6595763564109802, + "learning_rate": 0.00019987369566060176, + "loss": 0.7929, + "step": 30 + }, + { + "epoch": 0.056, + "grad_norm": 0.6968618035316467, + "learning_rate": 0.00019982809883148722, + "loss": 0.7683, + "step": 35 + }, + { + "epoch": 0.064, + "grad_norm": 0.4889592230319977, + "learning_rate": 0.00019977549572268468, + "loss": 0.8667, + "step": 40 + }, + { + "epoch": 0.072, + "grad_norm": 0.6651108264923096, + "learning_rate": 0.0001997158900260614, + "loss": 0.8446, + "step": 45 + }, + { + "epoch": 0.08, + "grad_norm": 0.5898510217666626, + "learning_rate": 0.00019964928592495045, + "loss": 0.9051, + "step": 50 + }, + { + "epoch": 0.088, + "grad_norm": 0.4398016035556793, + "learning_rate": 0.00019957568809385694, + "loss": 0.7235, + "step": 55 + }, + { + "epoch": 0.096, + "grad_norm": 0.6901968121528625, + "learning_rate": 0.00019949510169813003, + "loss": 0.8169, + "step": 60 + }, + { + "epoch": 0.104, + "grad_norm": 0.6267213225364685, + "learning_rate": 0.00019940753239360047, + "loss": 0.8266, + "step": 65 + }, + { + "epoch": 0.112, + "grad_norm": 0.48524895310401917, + "learning_rate": 0.00019931298632618356, + "loss": 0.758, + "step": 70 + }, + { + "epoch": 0.12, + "grad_norm": 0.5294132232666016, + "learning_rate": 0.0001992114701314478, + "loss": 0.7759, + "step": 75 + }, + { + "epoch": 0.128, + "grad_norm": 0.48957982659339905, + "learning_rate": 0.0001991029909341493, + "loss": 0.7797, + "step": 80 + }, + { + "epoch": 0.136, + "grad_norm": 0.645412802696228, + "learning_rate": 0.00019898755634773158, + "loss": 0.7437, + "step": 85 + }, + { + "epoch": 0.144, + "grad_norm": 0.43297675251960754, + "learning_rate": 0.0001988651744737914, + "loss": 0.8043, + "step": 90 + }, + { + "epoch": 0.152, + "grad_norm": 0.5513920783996582, + "learning_rate": 0.00019873585390151003, + "loss": 0.7701, + "step": 95 + }, + { + "epoch": 0.16, + "grad_norm": 0.8462435007095337, + "learning_rate": 0.0001985996037070505, + "loss": 0.709, + "step": 100 + }, + { + "epoch": 0.168, + "grad_norm": 0.6892585158348083, + "learning_rate": 0.00019845643345292054, + "loss": 0.7377, + "step": 105 + }, + { + "epoch": 0.176, + "grad_norm": 0.4617864191532135, + "learning_rate": 0.00019830635318730154, + "loss": 0.8352, + "step": 110 + }, + { + "epoch": 0.184, + "grad_norm": 0.6300354599952698, + "learning_rate": 0.0001981493734433433, + "loss": 0.7738, + "step": 115 + }, + { + "epoch": 0.192, + "grad_norm": 0.8086859583854675, + "learning_rate": 0.0001979855052384247, + "loss": 0.8067, + "step": 120 + }, + { + "epoch": 0.2, + "grad_norm": 0.6272985935211182, + "learning_rate": 0.00019781476007338058, + "loss": 0.7456, + "step": 125 + }, + { + "epoch": 0.208, + "grad_norm": 0.44750839471817017, + "learning_rate": 0.00019763714993169452, + "loss": 0.758, + "step": 130 + }, + { + "epoch": 0.216, + "grad_norm": 0.5053977370262146, + "learning_rate": 0.00019745268727865774, + "loss": 0.7895, + "step": 135 + }, + { + "epoch": 0.224, + "grad_norm": 0.41920769214630127, + "learning_rate": 0.00019726138506049438, + "loss": 0.7302, + "step": 140 + }, + { + "epoch": 0.232, + "grad_norm": 0.38280290365219116, + "learning_rate": 0.00019706325670345275, + "loss": 0.8152, + "step": 145 + }, + { + "epoch": 0.24, + "grad_norm": 0.554710865020752, + "learning_rate": 0.0001968583161128631, + "loss": 0.8461, + "step": 150 + }, + { + "epoch": 0.248, + "grad_norm": 0.5612509250640869, + "learning_rate": 0.00019664657767216176, + "loss": 0.7787, + "step": 155 + }, + { + "epoch": 0.256, + "grad_norm": 0.610614538192749, + "learning_rate": 0.00019642805624188147, + "loss": 0.7574, + "step": 160 + }, + { + "epoch": 0.264, + "grad_norm": 0.679517924785614, + "learning_rate": 0.0001962027671586086, + "loss": 0.8487, + "step": 165 + }, + { + "epoch": 0.272, + "grad_norm": 0.6685434579849243, + "learning_rate": 0.00019597072623390668, + "loss": 0.6611, + "step": 170 + }, + { + "epoch": 0.28, + "grad_norm": 0.480293869972229, + "learning_rate": 0.00019573194975320673, + "loss": 0.7802, + "step": 175 + }, + { + "epoch": 0.288, + "grad_norm": 0.7727369070053101, + "learning_rate": 0.00019548645447466431, + "loss": 0.6727, + "step": 180 + }, + { + "epoch": 0.296, + "grad_norm": 0.6371043920516968, + "learning_rate": 0.00019523425762798329, + "loss": 0.7502, + "step": 185 + }, + { + "epoch": 0.304, + "grad_norm": 0.6399966478347778, + "learning_rate": 0.00019497537691320668, + "loss": 0.8401, + "step": 190 + }, + { + "epoch": 0.312, + "grad_norm": 0.7263137698173523, + "learning_rate": 0.00019470983049947444, + "loss": 0.7494, + "step": 195 + }, + { + "epoch": 0.32, + "grad_norm": 0.402416467666626, + "learning_rate": 0.00019443763702374812, + "loss": 0.7842, + "step": 200 + }, + { + "epoch": 0.328, + "grad_norm": 0.6639626026153564, + "learning_rate": 0.00019415881558950302, + "loss": 0.8082, + "step": 205 + }, + { + "epoch": 0.336, + "grad_norm": 0.5801042914390564, + "learning_rate": 0.00019387338576538744, + "loss": 0.7883, + "step": 210 + }, + { + "epoch": 0.344, + "grad_norm": 0.5533607006072998, + "learning_rate": 0.00019358136758384912, + "loss": 0.7356, + "step": 215 + }, + { + "epoch": 0.352, + "grad_norm": 0.6019654273986816, + "learning_rate": 0.00019328278153972947, + "loss": 0.7891, + "step": 220 + }, + { + "epoch": 0.36, + "grad_norm": 0.5344104170799255, + "learning_rate": 0.00019297764858882514, + "loss": 0.7671, + "step": 225 + }, + { + "epoch": 0.368, + "grad_norm": 0.5494843125343323, + "learning_rate": 0.0001926659901464172, + "loss": 0.6608, + "step": 230 + }, + { + "epoch": 0.376, + "grad_norm": 0.465420126914978, + "learning_rate": 0.00019234782808576824, + "loss": 0.647, + "step": 235 + }, + { + "epoch": 0.384, + "grad_norm": 0.5202775001525879, + "learning_rate": 0.00019202318473658705, + "loss": 0.729, + "step": 240 + }, + { + "epoch": 0.392, + "grad_norm": 0.5757818222045898, + "learning_rate": 0.00019169208288346166, + "loss": 0.6713, + "step": 245 + }, + { + "epoch": 0.4, + "grad_norm": 0.46555572748184204, + "learning_rate": 0.0001913545457642601, + "loss": 0.7049, + "step": 250 + }, + { + "epoch": 0.408, + "grad_norm": 0.5101790428161621, + "learning_rate": 0.00019101059706849957, + "loss": 0.7419, + "step": 255 + }, + { + "epoch": 0.416, + "grad_norm": 0.6083744764328003, + "learning_rate": 0.00019066026093568378, + "loss": 0.7148, + "step": 260 + }, + { + "epoch": 0.424, + "grad_norm": 0.4719640612602234, + "learning_rate": 0.00019030356195360874, + "loss": 0.7493, + "step": 265 + }, + { + "epoch": 0.432, + "grad_norm": 0.7365225553512573, + "learning_rate": 0.0001899405251566371, + "loss": 0.7652, + "step": 270 + }, + { + "epoch": 0.44, + "grad_norm": 0.4452705383300781, + "learning_rate": 0.0001895711760239413, + "loss": 0.7438, + "step": 275 + }, + { + "epoch": 0.448, + "grad_norm": 0.6071786284446716, + "learning_rate": 0.0001891955404777151, + "loss": 0.7683, + "step": 280 + }, + { + "epoch": 0.456, + "grad_norm": 0.5774498581886292, + "learning_rate": 0.00018881364488135448, + "loss": 0.8115, + "step": 285 + }, + { + "epoch": 0.464, + "grad_norm": 0.6134682893753052, + "learning_rate": 0.00018842551603760724, + "loss": 0.8335, + "step": 290 + }, + { + "epoch": 0.472, + "grad_norm": 0.4869893193244934, + "learning_rate": 0.00018803118118669202, + "loss": 0.6933, + "step": 295 + }, + { + "epoch": 0.48, + "grad_norm": 0.6457111239433289, + "learning_rate": 0.00018763066800438636, + "loss": 0.7515, + "step": 300 + }, + { + "epoch": 0.488, + "grad_norm": 0.59674471616745, + "learning_rate": 0.0001872240046000844, + "loss": 0.6931, + "step": 305 + }, + { + "epoch": 0.496, + "grad_norm": 0.44608160853385925, + "learning_rate": 0.00018681121951482393, + "loss": 0.782, + "step": 310 + }, + { + "epoch": 0.504, + "grad_norm": 0.5934664607048035, + "learning_rate": 0.00018639234171928353, + "loss": 0.7361, + "step": 315 + }, + { + "epoch": 0.512, + "grad_norm": 0.49716323614120483, + "learning_rate": 0.0001859674006117491, + "loss": 0.7443, + "step": 320 + }, + { + "epoch": 0.52, + "grad_norm": 0.47995495796203613, + "learning_rate": 0.00018553642601605068, + "loss": 0.7221, + "step": 325 + }, + { + "epoch": 0.528, + "grad_norm": 0.5177399516105652, + "learning_rate": 0.00018509944817946922, + "loss": 0.7622, + "step": 330 + }, + { + "epoch": 0.536, + "grad_norm": 0.6638798713684082, + "learning_rate": 0.0001846564977706138, + "loss": 0.8556, + "step": 335 + }, + { + "epoch": 0.544, + "grad_norm": 0.5056771636009216, + "learning_rate": 0.00018420760587726923, + "loss": 0.7814, + "step": 340 + }, + { + "epoch": 0.552, + "grad_norm": 0.44543707370758057, + "learning_rate": 0.0001837528040042142, + "loss": 0.722, + "step": 345 + }, + { + "epoch": 0.56, + "grad_norm": 0.6765120625495911, + "learning_rate": 0.00018329212407100994, + "loss": 0.7903, + "step": 350 + }, + { + "epoch": 0.568, + "grad_norm": 0.49232372641563416, + "learning_rate": 0.00018282559840976042, + "loss": 0.6996, + "step": 355 + }, + { + "epoch": 0.576, + "grad_norm": 0.47392791509628296, + "learning_rate": 0.00018235325976284275, + "loss": 0.773, + "step": 360 + }, + { + "epoch": 0.584, + "grad_norm": 0.5056615471839905, + "learning_rate": 0.00018187514128060946, + "loss": 0.728, + "step": 365 + }, + { + "epoch": 0.592, + "grad_norm": 0.5857616662979126, + "learning_rate": 0.00018139127651906184, + "loss": 0.7659, + "step": 370 + }, + { + "epoch": 0.6, + "grad_norm": 0.5966864228248596, + "learning_rate": 0.00018090169943749476, + "loss": 0.7039, + "step": 375 + }, + { + "epoch": 0.608, + "grad_norm": 0.4524347484111786, + "learning_rate": 0.00018040644439611348, + "loss": 0.7125, + "step": 380 + }, + { + "epoch": 0.616, + "grad_norm": 0.5570976138114929, + "learning_rate": 0.00017990554615362198, + "loss": 0.698, + "step": 385 + }, + { + "epoch": 0.624, + "grad_norm": 0.6045777201652527, + "learning_rate": 0.00017939903986478355, + "loss": 0.8255, + "step": 390 + }, + { + "epoch": 0.632, + "grad_norm": 0.6149687767028809, + "learning_rate": 0.00017888696107795342, + "loss": 0.6616, + "step": 395 + }, + { + "epoch": 0.64, + "grad_norm": 0.4873579144477844, + "learning_rate": 0.000178369345732584, + "loss": 0.7452, + "step": 400 + }, + { + "epoch": 0.648, + "grad_norm": 0.5569061636924744, + "learning_rate": 0.00017784623015670238, + "loss": 0.7652, + "step": 405 + }, + { + "epoch": 0.656, + "grad_norm": 0.5825181603431702, + "learning_rate": 0.00017731765106436073, + "loss": 0.7793, + "step": 410 + }, + { + "epoch": 0.664, + "grad_norm": 0.4047383666038513, + "learning_rate": 0.00017678364555305978, + "loss": 0.6875, + "step": 415 + }, + { + "epoch": 0.672, + "grad_norm": 0.5080836415290833, + "learning_rate": 0.0001762442511011448, + "loss": 0.7465, + "step": 420 + }, + { + "epoch": 0.68, + "grad_norm": 0.5825940370559692, + "learning_rate": 0.00017569950556517566, + "loss": 0.7205, + "step": 425 + }, + { + "epoch": 0.688, + "grad_norm": 0.476992666721344, + "learning_rate": 0.00017514944717726962, + "loss": 0.6589, + "step": 430 + }, + { + "epoch": 0.696, + "grad_norm": 0.7424727082252502, + "learning_rate": 0.00017459411454241822, + "loss": 0.7035, + "step": 435 + }, + { + "epoch": 0.704, + "grad_norm": 0.6544787287712097, + "learning_rate": 0.00017403354663577783, + "loss": 0.787, + "step": 440 + }, + { + "epoch": 0.712, + "grad_norm": 0.49425187706947327, + "learning_rate": 0.00017346778279993415, + "loss": 0.7515, + "step": 445 + }, + { + "epoch": 0.72, + "grad_norm": 0.5473236441612244, + "learning_rate": 0.00017289686274214118, + "loss": 0.7199, + "step": 450 + }, + { + "epoch": 0.728, + "grad_norm": 0.6773544549942017, + "learning_rate": 0.00017232082653153422, + "loss": 0.8037, + "step": 455 + }, + { + "epoch": 0.736, + "grad_norm": 0.6355096101760864, + "learning_rate": 0.00017173971459631787, + "loss": 0.7502, + "step": 460 + }, + { + "epoch": 0.744, + "grad_norm": 0.47867000102996826, + "learning_rate": 0.00017115356772092857, + "loss": 0.7446, + "step": 465 + }, + { + "epoch": 0.752, + "grad_norm": 0.5135357975959778, + "learning_rate": 0.0001705624270431721, + "loss": 0.6507, + "step": 470 + }, + { + "epoch": 0.76, + "grad_norm": 0.48866042494773865, + "learning_rate": 0.00016996633405133655, + "loss": 0.7164, + "step": 475 + }, + { + "epoch": 0.768, + "grad_norm": 0.5892354249954224, + "learning_rate": 0.0001693653305812805, + "loss": 0.7621, + "step": 480 + }, + { + "epoch": 0.776, + "grad_norm": 0.6633970141410828, + "learning_rate": 0.00016875945881349676, + "loss": 0.7623, + "step": 485 + }, + { + "epoch": 0.784, + "grad_norm": 0.6444060802459717, + "learning_rate": 0.000168148761270152, + "loss": 0.6606, + "step": 490 + }, + { + "epoch": 0.792, + "grad_norm": 0.7012648582458496, + "learning_rate": 0.00016753328081210245, + "loss": 0.6941, + "step": 495 + }, + { + "epoch": 0.8, + "grad_norm": 0.7064160704612732, + "learning_rate": 0.00016691306063588583, + "loss": 0.6841, + "step": 500 + }, + { + "epoch": 0.808, + "grad_norm": 0.7241398096084595, + "learning_rate": 0.00016628814427068953, + "loss": 0.6996, + "step": 505 + }, + { + "epoch": 0.816, + "grad_norm": 0.7807374596595764, + "learning_rate": 0.00016565857557529566, + "loss": 0.7542, + "step": 510 + }, + { + "epoch": 0.824, + "grad_norm": 0.763768196105957, + "learning_rate": 0.00016502439873500289, + "loss": 0.7175, + "step": 515 + }, + { + "epoch": 0.832, + "grad_norm": 0.6105090379714966, + "learning_rate": 0.0001643856582585254, + "loss": 0.7565, + "step": 520 + }, + { + "epoch": 0.84, + "grad_norm": 0.5686540603637695, + "learning_rate": 0.000163742398974869, + "loss": 0.7339, + "step": 525 + }, + { + "epoch": 0.848, + "grad_norm": 0.5341500043869019, + "learning_rate": 0.00016309466603018496, + "loss": 0.569, + "step": 530 + }, + { + "epoch": 0.856, + "grad_norm": 0.7274748682975769, + "learning_rate": 0.00016244250488460158, + "loss": 0.7556, + "step": 535 + }, + { + "epoch": 0.864, + "grad_norm": 0.7321165204048157, + "learning_rate": 0.00016178596130903344, + "loss": 0.7084, + "step": 540 + }, + { + "epoch": 0.872, + "grad_norm": 0.5086159110069275, + "learning_rate": 0.00016112508138196917, + "loss": 0.6935, + "step": 545 + }, + { + "epoch": 0.88, + "grad_norm": 0.4714389443397522, + "learning_rate": 0.0001604599114862375, + "loss": 0.7076, + "step": 550 + }, + { + "epoch": 0.888, + "grad_norm": 0.5031452178955078, + "learning_rate": 0.0001597904983057519, + "loss": 0.7151, + "step": 555 + }, + { + "epoch": 0.896, + "grad_norm": 0.7745943665504456, + "learning_rate": 0.0001591168888222342, + "loss": 0.7001, + "step": 560 + }, + { + "epoch": 0.904, + "grad_norm": 0.6076303124427795, + "learning_rate": 0.00015843913031191723, + "loss": 0.7285, + "step": 565 + }, + { + "epoch": 0.912, + "grad_norm": 0.7456529140472412, + "learning_rate": 0.00015775727034222675, + "loss": 0.8041, + "step": 570 + }, + { + "epoch": 0.92, + "grad_norm": 0.5760998725891113, + "learning_rate": 0.0001570713567684432, + "loss": 0.7353, + "step": 575 + }, + { + "epoch": 0.928, + "grad_norm": 0.7057327032089233, + "learning_rate": 0.00015638143773034267, + "loss": 0.7792, + "step": 580 + }, + { + "epoch": 0.936, + "grad_norm": 0.7615967392921448, + "learning_rate": 0.00015568756164881882, + "loss": 1.0121, + "step": 585 + }, + { + "epoch": 0.944, + "grad_norm": 0.6304950714111328, + "learning_rate": 0.000154989777222484, + "loss": 0.7727, + "step": 590 + }, + { + "epoch": 0.952, + "grad_norm": 0.6852543950080872, + "learning_rate": 0.00015428813342425177, + "loss": 0.741, + "step": 595 + }, + { + "epoch": 0.96, + "grad_norm": 0.6379660964012146, + "learning_rate": 0.00015358267949789966, + "loss": 0.6919, + "step": 600 + }, + { + "epoch": 0.968, + "grad_norm": 0.5846463441848755, + "learning_rate": 0.00015287346495461315, + "loss": 0.7163, + "step": 605 + }, + { + "epoch": 0.976, + "grad_norm": 0.5999557971954346, + "learning_rate": 0.0001521605395695108, + "loss": 0.8152, + "step": 610 + }, + { + "epoch": 0.984, + "grad_norm": 0.5806307196617126, + "learning_rate": 0.00015144395337815064, + "loss": 0.6709, + "step": 615 + }, + { + "epoch": 0.992, + "grad_norm": 0.6559942960739136, + "learning_rate": 0.00015072375667301893, + "loss": 0.6527, + "step": 620 + }, + { + "epoch": 1.0, + "grad_norm": 0.6287715435028076, + "learning_rate": 0.00015000000000000001, + "loss": 0.8194, + "step": 625 + }, + { + "epoch": 1.008, + "grad_norm": 0.616222620010376, + "learning_rate": 0.00014927273415482915, + "loss": 0.6627, + "step": 630 + }, + { + "epoch": 1.016, + "grad_norm": 0.4750412106513977, + "learning_rate": 0.0001485420101795274, + "loss": 0.6366, + "step": 635 + }, + { + "epoch": 1.024, + "grad_norm": 0.5122964978218079, + "learning_rate": 0.00014780787935881923, + "loss": 0.6717, + "step": 640 + }, + { + "epoch": 1.032, + "grad_norm": 0.7382633090019226, + "learning_rate": 0.0001470703932165333, + "loss": 0.6483, + "step": 645 + }, + { + "epoch": 1.04, + "grad_norm": 0.6540554761886597, + "learning_rate": 0.00014632960351198618, + "loss": 0.6151, + "step": 650 + }, + { + "epoch": 1.048, + "grad_norm": 0.4776591956615448, + "learning_rate": 0.00014558556223635003, + "loss": 0.6707, + "step": 655 + }, + { + "epoch": 1.056, + "grad_norm": 0.8012662529945374, + "learning_rate": 0.00014483832160900326, + "loss": 0.6125, + "step": 660 + }, + { + "epoch": 1.064, + "grad_norm": 0.6735953092575073, + "learning_rate": 0.00014408793407386588, + "loss": 0.6206, + "step": 665 + }, + { + "epoch": 1.072, + "grad_norm": 0.5640230774879456, + "learning_rate": 0.00014333445229571873, + "loss": 0.6161, + "step": 670 + }, + { + "epoch": 1.08, + "grad_norm": 0.5928654074668884, + "learning_rate": 0.00014257792915650728, + "loss": 0.6583, + "step": 675 + }, + { + "epoch": 1.088, + "grad_norm": 0.7347397208213806, + "learning_rate": 0.00014181841775163013, + "loss": 0.6222, + "step": 680 + }, + { + "epoch": 1.096, + "grad_norm": 0.593773365020752, + "learning_rate": 0.0001410559713862128, + "loss": 0.716, + "step": 685 + }, + { + "epoch": 1.104, + "grad_norm": 0.6244611144065857, + "learning_rate": 0.00014029064357136628, + "loss": 0.6198, + "step": 690 + }, + { + "epoch": 1.112, + "grad_norm": 0.5083370804786682, + "learning_rate": 0.00013952248802043165, + "loss": 0.6389, + "step": 695 + }, + { + "epoch": 1.12, + "grad_norm": 0.5241413116455078, + "learning_rate": 0.0001387515586452103, + "loss": 0.6842, + "step": 700 + }, + { + "epoch": 1.1280000000000001, + "grad_norm": 0.524029016494751, + "learning_rate": 0.00013797790955218014, + "loss": 0.6071, + "step": 705 + }, + { + "epoch": 1.1360000000000001, + "grad_norm": 0.5097878575325012, + "learning_rate": 0.00013720159503869815, + "loss": 0.5915, + "step": 710 + }, + { + "epoch": 1.144, + "grad_norm": 0.5782963037490845, + "learning_rate": 0.00013642266958918984, + "loss": 0.6794, + "step": 715 + }, + { + "epoch": 1.152, + "grad_norm": 0.6088266372680664, + "learning_rate": 0.00013564118787132506, + "loss": 0.6773, + "step": 720 + }, + { + "epoch": 1.16, + "grad_norm": 0.7768995761871338, + "learning_rate": 0.00013485720473218154, + "loss": 0.668, + "step": 725 + }, + { + "epoch": 1.168, + "grad_norm": 0.6645551919937134, + "learning_rate": 0.0001340707751943952, + "loss": 0.6997, + "step": 730 + }, + { + "epoch": 1.176, + "grad_norm": 0.9228842258453369, + "learning_rate": 0.00013328195445229868, + "loss": 0.831, + "step": 735 + }, + { + "epoch": 1.184, + "grad_norm": 0.7556049823760986, + "learning_rate": 0.00013249079786804765, + "loss": 0.6378, + "step": 740 + }, + { + "epoch": 1.192, + "grad_norm": 0.832775354385376, + "learning_rate": 0.0001316973609677352, + "loss": 0.6547, + "step": 745 + }, + { + "epoch": 1.2, + "grad_norm": 0.7329304814338684, + "learning_rate": 0.00013090169943749476, + "loss": 0.5808, + "step": 750 + }, + { + "epoch": 1.208, + "grad_norm": 0.7193475961685181, + "learning_rate": 0.00013010386911959206, + "loss": 0.5582, + "step": 755 + }, + { + "epoch": 1.216, + "grad_norm": 0.6274734735488892, + "learning_rate": 0.00012930392600850573, + "loss": 0.5801, + "step": 760 + }, + { + "epoch": 1.224, + "grad_norm": 0.6485865712165833, + "learning_rate": 0.0001285019262469976, + "loss": 0.65, + "step": 765 + }, + { + "epoch": 1.232, + "grad_norm": 0.7164427042007446, + "learning_rate": 0.00012769792612217224, + "loss": 0.6627, + "step": 770 + }, + { + "epoch": 1.24, + "grad_norm": 0.600775957107544, + "learning_rate": 0.00012689198206152657, + "loss": 0.5603, + "step": 775 + }, + { + "epoch": 1.248, + "grad_norm": 0.8377975225448608, + "learning_rate": 0.00012608415062898972, + "loss": 0.6525, + "step": 780 + }, + { + "epoch": 1.256, + "grad_norm": 0.8069924116134644, + "learning_rate": 0.00012527448852095295, + "loss": 0.6731, + "step": 785 + }, + { + "epoch": 1.264, + "grad_norm": 0.6501213908195496, + "learning_rate": 0.00012446305256229073, + "loss": 0.6255, + "step": 790 + }, + { + "epoch": 1.272, + "grad_norm": 0.62812340259552, + "learning_rate": 0.00012364989970237248, + "loss": 0.6585, + "step": 795 + }, + { + "epoch": 1.28, + "grad_norm": 0.5702307820320129, + "learning_rate": 0.00012283508701106557, + "loss": 0.5996, + "step": 800 + }, + { + "epoch": 1.288, + "grad_norm": 0.6311281323432922, + "learning_rate": 0.00012201867167473015, + "loss": 0.6355, + "step": 805 + }, + { + "epoch": 1.296, + "grad_norm": 0.5885419249534607, + "learning_rate": 0.00012120071099220549, + "loss": 0.6615, + "step": 810 + }, + { + "epoch": 1.304, + "grad_norm": 0.5239307284355164, + "learning_rate": 0.0001203812623707885, + "loss": 0.6096, + "step": 815 + }, + { + "epoch": 1.312, + "grad_norm": 0.6101869940757751, + "learning_rate": 0.00011956038332220483, + "loss": 0.5984, + "step": 820 + }, + { + "epoch": 1.32, + "grad_norm": 0.4395413100719452, + "learning_rate": 0.00011873813145857249, + "loss": 0.5569, + "step": 825 + }, + { + "epoch": 1.328, + "grad_norm": 0.8984820246696472, + "learning_rate": 0.00011791456448835825, + "loss": 0.7088, + "step": 830 + }, + { + "epoch": 1.336, + "grad_norm": 0.7709664106369019, + "learning_rate": 0.00011708974021232769, + "loss": 0.6731, + "step": 835 + }, + { + "epoch": 1.3439999999999999, + "grad_norm": 0.6782217025756836, + "learning_rate": 0.00011626371651948838, + "loss": 0.6188, + "step": 840 + }, + { + "epoch": 1.3519999999999999, + "grad_norm": 0.6427358984947205, + "learning_rate": 0.00011543655138302714, + "loss": 0.7004, + "step": 845 + }, + { + "epoch": 1.3599999999999999, + "grad_norm": 0.5902594923973083, + "learning_rate": 0.00011460830285624118, + "loss": 0.5884, + "step": 850 + }, + { + "epoch": 1.3679999999999999, + "grad_norm": 0.5935835838317871, + "learning_rate": 0.0001137790290684638, + "loss": 0.5739, + "step": 855 + }, + { + "epoch": 1.376, + "grad_norm": 0.6752728223800659, + "learning_rate": 0.00011294878822098469, + "loss": 0.6435, + "step": 860 + }, + { + "epoch": 1.384, + "grad_norm": 0.7927135825157166, + "learning_rate": 0.00011211763858296507, + "loss": 0.6897, + "step": 865 + }, + { + "epoch": 1.392, + "grad_norm": 0.714499294757843, + "learning_rate": 0.00011128563848734816, + "loss": 0.6641, + "step": 870 + }, + { + "epoch": 1.4, + "grad_norm": 0.7086356282234192, + "learning_rate": 0.00011045284632676536, + "loss": 0.6273, + "step": 875 + }, + { + "epoch": 1.408, + "grad_norm": 0.6125518679618835, + "learning_rate": 0.00010961932054943778, + "loss": 0.6437, + "step": 880 + }, + { + "epoch": 1.416, + "grad_norm": 0.5635287165641785, + "learning_rate": 0.00010878511965507434, + "loss": 0.6345, + "step": 885 + }, + { + "epoch": 1.424, + "grad_norm": 0.47936007380485535, + "learning_rate": 0.00010795030219076599, + "loss": 0.5913, + "step": 890 + }, + { + "epoch": 1.432, + "grad_norm": 0.7142558097839355, + "learning_rate": 0.00010711492674687671, + "loss": 0.6482, + "step": 895 + }, + { + "epoch": 1.44, + "grad_norm": 0.5252729058265686, + "learning_rate": 0.00010627905195293135, + "loss": 0.6165, + "step": 900 + }, + { + "epoch": 1.448, + "grad_norm": 0.896318793296814, + "learning_rate": 0.00010544273647350092, + "loss": 0.634, + "step": 905 + }, + { + "epoch": 1.456, + "grad_norm": 0.6029036045074463, + "learning_rate": 0.00010460603900408523, + "loss": 0.6509, + "step": 910 + }, + { + "epoch": 1.464, + "grad_norm": 0.6835671663284302, + "learning_rate": 0.00010376901826699348, + "loss": 0.6212, + "step": 915 + }, + { + "epoch": 1.472, + "grad_norm": 0.7098750472068787, + "learning_rate": 0.00010293173300722285, + "loss": 0.7305, + "step": 920 + }, + { + "epoch": 1.48, + "grad_norm": 0.675316333770752, + "learning_rate": 0.0001020942419883357, + "loss": 0.6685, + "step": 925 + }, + { + "epoch": 1.488, + "grad_norm": 0.6675406098365784, + "learning_rate": 0.00010125660398833528, + "loss": 0.6214, + "step": 930 + }, + { + "epoch": 1.496, + "grad_norm": 0.6629154682159424, + "learning_rate": 0.0001004188777955404, + "loss": 0.6035, + "step": 935 + }, + { + "epoch": 1.504, + "grad_norm": 0.7732692360877991, + "learning_rate": 9.958112220445963e-05, + "loss": 0.5868, + "step": 940 + }, + { + "epoch": 1.512, + "grad_norm": 0.6238484978675842, + "learning_rate": 9.874339601166473e-05, + "loss": 0.6003, + "step": 945 + }, + { + "epoch": 1.52, + "grad_norm": 0.6622412800788879, + "learning_rate": 9.790575801166432e-05, + "loss": 0.5854, + "step": 950 + }, + { + "epoch": 1.528, + "grad_norm": 0.7598085999488831, + "learning_rate": 9.706826699277718e-05, + "loss": 0.5882, + "step": 955 + }, + { + "epoch": 1.536, + "grad_norm": 0.8870390057563782, + "learning_rate": 9.623098173300654e-05, + "loss": 0.7187, + "step": 960 + }, + { + "epoch": 1.544, + "grad_norm": 0.702232837677002, + "learning_rate": 9.539396099591476e-05, + "loss": 0.6156, + "step": 965 + }, + { + "epoch": 1.552, + "grad_norm": 0.694139301776886, + "learning_rate": 9.455726352649911e-05, + "loss": 0.6488, + "step": 970 + }, + { + "epoch": 1.56, + "grad_norm": 0.5684956312179565, + "learning_rate": 9.372094804706867e-05, + "loss": 0.6601, + "step": 975 + }, + { + "epoch": 1.568, + "grad_norm": 0.6327118277549744, + "learning_rate": 9.288507325312335e-05, + "loss": 0.5968, + "step": 980 + }, + { + "epoch": 1.576, + "grad_norm": 0.5732144117355347, + "learning_rate": 9.204969780923403e-05, + "loss": 0.7034, + "step": 985 + }, + { + "epoch": 1.584, + "grad_norm": 0.7025273442268372, + "learning_rate": 9.121488034492569e-05, + "loss": 0.5973, + "step": 990 + }, + { + "epoch": 1.592, + "grad_norm": 0.9270740151405334, + "learning_rate": 9.038067945056227e-05, + "loss": 0.7877, + "step": 995 + }, + { + "epoch": 1.6, + "grad_norm": 0.6676818132400513, + "learning_rate": 8.954715367323468e-05, + "loss": 0.644, + "step": 1000 + }, + { + "epoch": 1.608, + "grad_norm": 0.6687547564506531, + "learning_rate": 8.871436151265184e-05, + "loss": 0.6678, + "step": 1005 + }, + { + "epoch": 1.616, + "grad_norm": 0.7400322556495667, + "learning_rate": 8.788236141703498e-05, + "loss": 0.6088, + "step": 1010 + }, + { + "epoch": 1.624, + "grad_norm": 0.5504963994026184, + "learning_rate": 8.705121177901532e-05, + "loss": 0.6219, + "step": 1015 + }, + { + "epoch": 1.6320000000000001, + "grad_norm": 0.8088738322257996, + "learning_rate": 8.62209709315362e-05, + "loss": 0.6698, + "step": 1020 + }, + { + "epoch": 1.6400000000000001, + "grad_norm": 0.7380816340446472, + "learning_rate": 8.539169714375885e-05, + "loss": 0.6207, + "step": 1025 + }, + { + "epoch": 1.6480000000000001, + "grad_norm": 0.6346850395202637, + "learning_rate": 8.456344861697289e-05, + "loss": 0.626, + "step": 1030 + }, + { + "epoch": 1.6560000000000001, + "grad_norm": 0.49918702244758606, + "learning_rate": 8.373628348051165e-05, + "loss": 0.6972, + "step": 1035 + }, + { + "epoch": 1.6640000000000001, + "grad_norm": 0.7200607657432556, + "learning_rate": 8.291025978767235e-05, + "loss": 0.6282, + "step": 1040 + }, + { + "epoch": 1.6720000000000002, + "grad_norm": 0.8350688815116882, + "learning_rate": 8.208543551164178e-05, + "loss": 0.6219, + "step": 1045 + }, + { + "epoch": 1.6800000000000002, + "grad_norm": 0.5957468748092651, + "learning_rate": 8.126186854142752e-05, + "loss": 0.622, + "step": 1050 + }, + { + "epoch": 1.688, + "grad_norm": 0.7494757175445557, + "learning_rate": 8.04396166777952e-05, + "loss": 0.5801, + "step": 1055 + }, + { + "epoch": 1.696, + "grad_norm": 0.7109113931655884, + "learning_rate": 7.961873762921153e-05, + "loss": 0.598, + "step": 1060 + }, + { + "epoch": 1.704, + "grad_norm": 0.7941219210624695, + "learning_rate": 7.879928900779456e-05, + "loss": 0.699, + "step": 1065 + }, + { + "epoch": 1.712, + "grad_norm": 0.7282152771949768, + "learning_rate": 7.798132832526986e-05, + "loss": 0.5882, + "step": 1070 + }, + { + "epoch": 1.72, + "grad_norm": 0.5923735499382019, + "learning_rate": 7.716491298893442e-05, + "loss": 0.5321, + "step": 1075 + }, + { + "epoch": 1.728, + "grad_norm": 0.5571523308753967, + "learning_rate": 7.635010029762756e-05, + "loss": 0.6647, + "step": 1080 + }, + { + "epoch": 1.736, + "grad_norm": 0.6043043732643127, + "learning_rate": 7.553694743770928e-05, + "loss": 0.628, + "step": 1085 + }, + { + "epoch": 1.744, + "grad_norm": 0.8733720183372498, + "learning_rate": 7.472551147904708e-05, + "loss": 0.6262, + "step": 1090 + }, + { + "epoch": 1.752, + "grad_norm": 0.5561848878860474, + "learning_rate": 7.391584937101033e-05, + "loss": 0.6131, + "step": 1095 + }, + { + "epoch": 1.76, + "grad_norm": 0.6611356735229492, + "learning_rate": 7.310801793847344e-05, + "loss": 0.6494, + "step": 1100 + }, + { + "epoch": 1.768, + "grad_norm": 0.5700333714485168, + "learning_rate": 7.230207387782776e-05, + "loss": 0.5514, + "step": 1105 + }, + { + "epoch": 1.776, + "grad_norm": 0.705662727355957, + "learning_rate": 7.149807375300239e-05, + "loss": 0.5823, + "step": 1110 + }, + { + "epoch": 1.784, + "grad_norm": 0.9632449150085449, + "learning_rate": 7.069607399149428e-05, + "loss": 0.7207, + "step": 1115 + }, + { + "epoch": 1.792, + "grad_norm": 0.6412242650985718, + "learning_rate": 6.989613088040796e-05, + "loss": 0.7006, + "step": 1120 + }, + { + "epoch": 1.8, + "grad_norm": 0.538044273853302, + "learning_rate": 6.909830056250527e-05, + "loss": 0.6222, + "step": 1125 + }, + { + "epoch": 1.808, + "grad_norm": 0.6124762296676636, + "learning_rate": 6.830263903226483e-05, + "loss": 0.6569, + "step": 1130 + }, + { + "epoch": 1.8159999999999998, + "grad_norm": 0.7250857949256897, + "learning_rate": 6.750920213195238e-05, + "loss": 0.543, + "step": 1135 + }, + { + "epoch": 1.8239999999999998, + "grad_norm": 0.6840488910675049, + "learning_rate": 6.671804554770135e-05, + "loss": 0.6334, + "step": 1140 + }, + { + "epoch": 1.8319999999999999, + "grad_norm": 0.8932304382324219, + "learning_rate": 6.592922480560483e-05, + "loss": 0.6701, + "step": 1145 + }, + { + "epoch": 1.8399999999999999, + "grad_norm": 0.9220761656761169, + "learning_rate": 6.51427952678185e-05, + "loss": 0.6216, + "step": 1150 + }, + { + "epoch": 1.8479999999999999, + "grad_norm": 0.688731849193573, + "learning_rate": 6.435881212867493e-05, + "loss": 0.5877, + "step": 1155 + }, + { + "epoch": 1.8559999999999999, + "grad_norm": 0.6370134949684143, + "learning_rate": 6.357733041081018e-05, + "loss": 0.6256, + "step": 1160 + }, + { + "epoch": 1.8639999999999999, + "grad_norm": 0.7421153783798218, + "learning_rate": 6.27984049613019e-05, + "loss": 0.7062, + "step": 1165 + }, + { + "epoch": 1.8719999999999999, + "grad_norm": 0.8129108548164368, + "learning_rate": 6.20220904478199e-05, + "loss": 0.6304, + "step": 1170 + }, + { + "epoch": 1.88, + "grad_norm": 0.7866687774658203, + "learning_rate": 6.12484413547897e-05, + "loss": 0.7695, + "step": 1175 + }, + { + "epoch": 1.888, + "grad_norm": 0.8307198882102966, + "learning_rate": 6.047751197956838e-05, + "loss": 0.5723, + "step": 1180 + }, + { + "epoch": 1.896, + "grad_norm": 0.5541536211967468, + "learning_rate": 5.9709356428633746e-05, + "loss": 0.6847, + "step": 1185 + }, + { + "epoch": 1.904, + "grad_norm": 0.731555163860321, + "learning_rate": 5.8944028613787206e-05, + "loss": 0.6618, + "step": 1190 + }, + { + "epoch": 1.912, + "grad_norm": 1.020991325378418, + "learning_rate": 5.818158224836987e-05, + "loss": 0.6275, + "step": 1195 + }, + { + "epoch": 1.92, + "grad_norm": 0.7172287702560425, + "learning_rate": 5.7422070843492734e-05, + "loss": 0.5617, + "step": 1200 + }, + { + "epoch": 1.928, + "grad_norm": 0.7232096791267395, + "learning_rate": 5.666554770428129e-05, + "loss": 0.6278, + "step": 1205 + }, + { + "epoch": 1.936, + "grad_norm": 0.7234194278717041, + "learning_rate": 5.591206592613416e-05, + "loss": 0.6713, + "step": 1210 + }, + { + "epoch": 1.944, + "grad_norm": 0.6906010508537292, + "learning_rate": 5.5161678390996796e-05, + "loss": 0.6113, + "step": 1215 + }, + { + "epoch": 1.952, + "grad_norm": 0.9057906270027161, + "learning_rate": 5.441443776365003e-05, + "loss": 0.5587, + "step": 1220 + }, + { + "epoch": 1.96, + "grad_norm": 0.7144932150840759, + "learning_rate": 5.3670396488013854e-05, + "loss": 0.5601, + "step": 1225 + }, + { + "epoch": 1.968, + "grad_norm": 1.0071252584457397, + "learning_rate": 5.292960678346675e-05, + "loss": 0.5941, + "step": 1230 + }, + { + "epoch": 1.976, + "grad_norm": 0.7061260342597961, + "learning_rate": 5.2192120641180786e-05, + "loss": 0.6285, + "step": 1235 + }, + { + "epoch": 1.984, + "grad_norm": 0.6937069892883301, + "learning_rate": 5.145798982047261e-05, + "loss": 0.6516, + "step": 1240 + }, + { + "epoch": 1.992, + "grad_norm": 0.5176392793655396, + "learning_rate": 5.072726584517086e-05, + "loss": 0.5904, + "step": 1245 + }, + { + "epoch": 2.0, + "grad_norm": 0.8501409292221069, + "learning_rate": 5.000000000000002e-05, + "loss": 0.619, + "step": 1250 + }, + { + "epoch": 2.008, + "grad_norm": 0.5451085567474365, + "learning_rate": 4.927624332698109e-05, + "loss": 0.6058, + "step": 1255 + }, + { + "epoch": 2.016, + "grad_norm": 0.8410437107086182, + "learning_rate": 4.8556046621849346e-05, + "loss": 0.6248, + "step": 1260 + }, + { + "epoch": 2.024, + "grad_norm": 0.6089378595352173, + "learning_rate": 4.783946043048923e-05, + "loss": 0.5247, + "step": 1265 + }, + { + "epoch": 2.032, + "grad_norm": 0.47437986731529236, + "learning_rate": 4.712653504538683e-05, + "loss": 0.5439, + "step": 1270 + }, + { + "epoch": 2.04, + "grad_norm": 0.6925654411315918, + "learning_rate": 4.6417320502100316e-05, + "loss": 0.4491, + "step": 1275 + }, + { + "epoch": 2.048, + "grad_norm": 0.8538162708282471, + "learning_rate": 4.5711866575748276e-05, + "loss": 0.52, + "step": 1280 + }, + { + "epoch": 2.056, + "grad_norm": 0.8048768639564514, + "learning_rate": 4.501022277751602e-05, + "loss": 0.5259, + "step": 1285 + }, + { + "epoch": 2.064, + "grad_norm": 0.7191641330718994, + "learning_rate": 4.431243835118124e-05, + "loss": 0.5025, + "step": 1290 + }, + { + "epoch": 2.072, + "grad_norm": 1.0918892621994019, + "learning_rate": 4.361856226965733e-05, + "loss": 0.4772, + "step": 1295 + }, + { + "epoch": 2.08, + "grad_norm": 0.8152824640274048, + "learning_rate": 4.2928643231556844e-05, + "loss": 0.5945, + "step": 1300 + }, + { + "epoch": 2.088, + "grad_norm": 0.640073299407959, + "learning_rate": 4.224272965777326e-05, + "loss": 0.4813, + "step": 1305 + }, + { + "epoch": 2.096, + "grad_norm": 0.7172432541847229, + "learning_rate": 4.15608696880828e-05, + "loss": 0.5315, + "step": 1310 + }, + { + "epoch": 2.104, + "grad_norm": 1.1283674240112305, + "learning_rate": 4.08831111777658e-05, + "loss": 0.5591, + "step": 1315 + }, + { + "epoch": 2.112, + "grad_norm": 0.8184736967086792, + "learning_rate": 4.020950169424815e-05, + "loss": 0.605, + "step": 1320 + }, + { + "epoch": 2.12, + "grad_norm": 0.6823618412017822, + "learning_rate": 3.954008851376252e-05, + "loss": 0.4955, + "step": 1325 + }, + { + "epoch": 2.128, + "grad_norm": 0.8576385378837585, + "learning_rate": 3.887491861803085e-05, + "loss": 0.5757, + "step": 1330 + }, + { + "epoch": 2.136, + "grad_norm": 0.967835009098053, + "learning_rate": 3.821403869096658e-05, + "loss": 0.5313, + "step": 1335 + }, + { + "epoch": 2.144, + "grad_norm": 0.7330173254013062, + "learning_rate": 3.755749511539845e-05, + "loss": 0.5904, + "step": 1340 + }, + { + "epoch": 2.152, + "grad_norm": 0.6664792895317078, + "learning_rate": 3.690533396981504e-05, + "loss": 0.4679, + "step": 1345 + }, + { + "epoch": 2.16, + "grad_norm": 0.7639065980911255, + "learning_rate": 3.6257601025131026e-05, + "loss": 0.5235, + "step": 1350 + }, + { + "epoch": 2.168, + "grad_norm": 0.6960520148277283, + "learning_rate": 3.561434174147463e-05, + "loss": 0.5797, + "step": 1355 + }, + { + "epoch": 2.176, + "grad_norm": 0.8231356143951416, + "learning_rate": 3.497560126499709e-05, + "loss": 0.5772, + "step": 1360 + }, + { + "epoch": 2.184, + "grad_norm": 0.8968437910079956, + "learning_rate": 3.4341424424704375e-05, + "loss": 0.5316, + "step": 1365 + }, + { + "epoch": 2.192, + "grad_norm": 0.8037480711936951, + "learning_rate": 3.371185572931048e-05, + "loss": 0.5646, + "step": 1370 + }, + { + "epoch": 2.2, + "grad_norm": 0.9330148696899414, + "learning_rate": 3.308693936411421e-05, + "loss": 0.5431, + "step": 1375 + }, + { + "epoch": 2.208, + "grad_norm": 0.6958775520324707, + "learning_rate": 3.246671918789755e-05, + "loss": 0.5403, + "step": 1380 + }, + { + "epoch": 2.216, + "grad_norm": 0.9870476126670837, + "learning_rate": 3.1851238729848034e-05, + "loss": 0.5329, + "step": 1385 + }, + { + "epoch": 2.224, + "grad_norm": 0.5036590099334717, + "learning_rate": 3.124054118650327e-05, + "loss": 0.5696, + "step": 1390 + }, + { + "epoch": 2.232, + "grad_norm": 0.8640053868293762, + "learning_rate": 3.063466941871952e-05, + "loss": 0.59, + "step": 1395 + }, + { + "epoch": 2.24, + "grad_norm": 0.6065173149108887, + "learning_rate": 3.0033665948663448e-05, + "loss": 0.5116, + "step": 1400 + }, + { + "epoch": 2.248, + "grad_norm": 1.083775520324707, + "learning_rate": 2.9437572956827964e-05, + "loss": 0.5783, + "step": 1405 + }, + { + "epoch": 2.2560000000000002, + "grad_norm": 0.7090497016906738, + "learning_rate": 2.8846432279071467e-05, + "loss": 0.6259, + "step": 1410 + }, + { + "epoch": 2.2640000000000002, + "grad_norm": 0.742468535900116, + "learning_rate": 2.826028540368215e-05, + "loss": 0.5759, + "step": 1415 + }, + { + "epoch": 2.2720000000000002, + "grad_norm": 0.9219839572906494, + "learning_rate": 2.7679173468465812e-05, + "loss": 0.497, + "step": 1420 + }, + { + "epoch": 2.2800000000000002, + "grad_norm": 0.7159206867218018, + "learning_rate": 2.7103137257858868e-05, + "loss": 0.619, + "step": 1425 + }, + { + "epoch": 2.288, + "grad_norm": 0.6997727751731873, + "learning_rate": 2.6532217200065858e-05, + "loss": 0.5858, + "step": 1430 + }, + { + "epoch": 2.296, + "grad_norm": 0.7493643164634705, + "learning_rate": 2.5966453364222186e-05, + "loss": 0.6291, + "step": 1435 + }, + { + "epoch": 2.304, + "grad_norm": 0.8311699032783508, + "learning_rate": 2.540588545758179e-05, + "loss": 0.6418, + "step": 1440 + }, + { + "epoch": 2.312, + "grad_norm": 0.7084354758262634, + "learning_rate": 2.48505528227304e-05, + "loss": 0.5483, + "step": 1445 + }, + { + "epoch": 2.32, + "grad_norm": 0.734438955783844, + "learning_rate": 2.4300494434824373e-05, + "loss": 0.6071, + "step": 1450 + }, + { + "epoch": 2.328, + "grad_norm": 0.8913635015487671, + "learning_rate": 2.37557488988552e-05, + "loss": 0.5099, + "step": 1455 + }, + { + "epoch": 2.336, + "grad_norm": 0.8349048495292664, + "learning_rate": 2.321635444694028e-05, + "loss": 0.5186, + "step": 1460 + }, + { + "epoch": 2.344, + "grad_norm": 0.6164011359214783, + "learning_rate": 2.2682348935639274e-05, + "loss": 0.5043, + "step": 1465 + }, + { + "epoch": 2.352, + "grad_norm": 1.044892430305481, + "learning_rate": 2.2153769843297667e-05, + "loss": 0.61, + "step": 1470 + }, + { + "epoch": 2.36, + "grad_norm": 0.9142879247665405, + "learning_rate": 2.163065426741603e-05, + "loss": 0.5987, + "step": 1475 + }, + { + "epoch": 2.368, + "grad_norm": 0.6232836842536926, + "learning_rate": 2.1113038922046602e-05, + "loss": 0.5212, + "step": 1480 + }, + { + "epoch": 2.376, + "grad_norm": 0.49558231234550476, + "learning_rate": 2.0600960135216462e-05, + "loss": 0.4796, + "step": 1485 + }, + { + "epoch": 2.384, + "grad_norm": 0.7887687683105469, + "learning_rate": 2.009445384637805e-05, + "loss": 0.4844, + "step": 1490 + }, + { + "epoch": 2.392, + "grad_norm": 0.8086990714073181, + "learning_rate": 1.9593555603886538e-05, + "loss": 0.5085, + "step": 1495 + }, + { + "epoch": 2.4, + "grad_norm": 0.6713303327560425, + "learning_rate": 1.9098300562505266e-05, + "loss": 0.4839, + "step": 1500 + } + ], + "logging_steps": 5, + "max_steps": 1875, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 9.380974420190822e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1500/training_args.bin b/checkpoint-1500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b72392a9094690809605a9b89988a05f68c770c0 --- /dev/null +++ b/checkpoint-1500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f6303cd7c1754cff6fd463f0e4dddf2a566ba7f29bbf2a72fbb560b32d5ce98 +size 5432 diff --git a/checkpoint-1600/README.md b/checkpoint-1600/README.md new file mode 100644 index 0000000000000000000000000000000000000000..31d07dbe00996b0568fd0eb91d05ac9a95722459 --- /dev/null +++ b/checkpoint-1600/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.2-1B +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-1600/adapter_config.json b/checkpoint-1600/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73202e5c7ba7132c890e52da7442b36a9efff4dd --- /dev/null +++ b/checkpoint-1600/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.2-1B", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "down_proj", + "k_proj", + "gate_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1600/adapter_model.safetensors b/checkpoint-1600/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..abdf7f3146e8c6b8481994f15fd32b28dc33b373 --- /dev/null +++ b/checkpoint-1600/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:337145b61a5001c8b09d6896c4e7f224022d6fa1fd18637ba924032c11327a41 +size 22573704 diff --git a/checkpoint-1600/optimizer.pt b/checkpoint-1600/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..53acd7f31fb3a79d2e10bb085a2453169fbd3534 --- /dev/null +++ b/checkpoint-1600/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caea1c237acd1c97587f3784f6b230128367e5a3b6cdea722de3d9b47ab66008 +size 45276986 diff --git a/checkpoint-1600/rng_state.pth b/checkpoint-1600/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..323d589b0258d085c7a07b0c4f97b26796e865c5 --- /dev/null +++ b/checkpoint-1600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9196a1e708bf24d6abba41cce3f8558820acc3e50f9394c5955e29eb41ffea3d +size 14244 diff --git a/checkpoint-1600/scheduler.pt b/checkpoint-1600/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..55abad80d146bbe0e01861c5346970fd406d6e3a --- /dev/null +++ b/checkpoint-1600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08ac804f61ed769803ac44a524dacfd9d31977b3b034955eebeef825bec16b3a +size 1064 diff --git a/checkpoint-1600/special_tokens_map.json b/checkpoint-1600/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/checkpoint-1600/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/checkpoint-1600/tokenizer.json b/checkpoint-1600/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-1600/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-1600/tokenizer_config.json b/checkpoint-1600/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61c03bf2b5e0c03056cebf7a58c8c096dafc1ffc --- /dev/null +++ b/checkpoint-1600/tokenizer_config.json @@ -0,0 +1,2065 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end_of_text|>' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|end_of_text|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1600/trainer_state.json b/checkpoint-1600/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d85bcecfe762d5eb166535c2bd66ef3210212c81 --- /dev/null +++ b/checkpoint-1600/trainer_state.json @@ -0,0 +1,2273 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.56, + "eval_steps": 500, + "global_step": 1600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008, + "grad_norm": 0.8485889434814453, + "learning_rate": 0.0001999964908278481, + "loss": 1.2049, + "step": 5 + }, + { + "epoch": 0.016, + "grad_norm": 0.47789862751960754, + "learning_rate": 0.00019998596355767805, + "loss": 0.9333, + "step": 10 + }, + { + "epoch": 0.024, + "grad_norm": 1.017558217048645, + "learning_rate": 0.00019996841892833, + "loss": 0.8671, + "step": 15 + }, + { + "epoch": 0.032, + "grad_norm": 0.6610977053642273, + "learning_rate": 0.00019994385817114646, + "loss": 0.7979, + "step": 20 + }, + { + "epoch": 0.04, + "grad_norm": 0.6075429320335388, + "learning_rate": 0.00019991228300988585, + "loss": 0.7662, + "step": 25 + }, + { + "epoch": 0.048, + "grad_norm": 0.6595763564109802, + "learning_rate": 0.00019987369566060176, + "loss": 0.7929, + "step": 30 + }, + { + "epoch": 0.056, + "grad_norm": 0.6968618035316467, + "learning_rate": 0.00019982809883148722, + "loss": 0.7683, + "step": 35 + }, + { + "epoch": 0.064, + "grad_norm": 0.4889592230319977, + "learning_rate": 0.00019977549572268468, + "loss": 0.8667, + "step": 40 + }, + { + "epoch": 0.072, + "grad_norm": 0.6651108264923096, + "learning_rate": 0.0001997158900260614, + "loss": 0.8446, + "step": 45 + }, + { + "epoch": 0.08, + "grad_norm": 0.5898510217666626, + "learning_rate": 0.00019964928592495045, + "loss": 0.9051, + "step": 50 + }, + { + "epoch": 0.088, + "grad_norm": 0.4398016035556793, + "learning_rate": 0.00019957568809385694, + "loss": 0.7235, + "step": 55 + }, + { + "epoch": 0.096, + "grad_norm": 0.6901968121528625, + "learning_rate": 0.00019949510169813003, + "loss": 0.8169, + "step": 60 + }, + { + "epoch": 0.104, + "grad_norm": 0.6267213225364685, + "learning_rate": 0.00019940753239360047, + "loss": 0.8266, + "step": 65 + }, + { + "epoch": 0.112, + "grad_norm": 0.48524895310401917, + "learning_rate": 0.00019931298632618356, + "loss": 0.758, + "step": 70 + }, + { + "epoch": 0.12, + "grad_norm": 0.5294132232666016, + "learning_rate": 0.0001992114701314478, + "loss": 0.7759, + "step": 75 + }, + { + "epoch": 0.128, + "grad_norm": 0.48957982659339905, + "learning_rate": 0.0001991029909341493, + "loss": 0.7797, + "step": 80 + }, + { + "epoch": 0.136, + "grad_norm": 0.645412802696228, + "learning_rate": 0.00019898755634773158, + "loss": 0.7437, + "step": 85 + }, + { + "epoch": 0.144, + "grad_norm": 0.43297675251960754, + "learning_rate": 0.0001988651744737914, + "loss": 0.8043, + "step": 90 + }, + { + "epoch": 0.152, + "grad_norm": 0.5513920783996582, + "learning_rate": 0.00019873585390151003, + "loss": 0.7701, + "step": 95 + }, + { + "epoch": 0.16, + "grad_norm": 0.8462435007095337, + "learning_rate": 0.0001985996037070505, + "loss": 0.709, + "step": 100 + }, + { + "epoch": 0.168, + "grad_norm": 0.6892585158348083, + "learning_rate": 0.00019845643345292054, + "loss": 0.7377, + "step": 105 + }, + { + "epoch": 0.176, + "grad_norm": 0.4617864191532135, + "learning_rate": 0.00019830635318730154, + "loss": 0.8352, + "step": 110 + }, + { + "epoch": 0.184, + "grad_norm": 0.6300354599952698, + "learning_rate": 0.0001981493734433433, + "loss": 0.7738, + "step": 115 + }, + { + "epoch": 0.192, + "grad_norm": 0.8086859583854675, + "learning_rate": 0.0001979855052384247, + "loss": 0.8067, + "step": 120 + }, + { + "epoch": 0.2, + "grad_norm": 0.6272985935211182, + "learning_rate": 0.00019781476007338058, + "loss": 0.7456, + "step": 125 + }, + { + "epoch": 0.208, + "grad_norm": 0.44750839471817017, + "learning_rate": 0.00019763714993169452, + "loss": 0.758, + "step": 130 + }, + { + "epoch": 0.216, + "grad_norm": 0.5053977370262146, + "learning_rate": 0.00019745268727865774, + "loss": 0.7895, + "step": 135 + }, + { + "epoch": 0.224, + "grad_norm": 0.41920769214630127, + "learning_rate": 0.00019726138506049438, + "loss": 0.7302, + "step": 140 + }, + { + "epoch": 0.232, + "grad_norm": 0.38280290365219116, + "learning_rate": 0.00019706325670345275, + "loss": 0.8152, + "step": 145 + }, + { + "epoch": 0.24, + "grad_norm": 0.554710865020752, + "learning_rate": 0.0001968583161128631, + "loss": 0.8461, + "step": 150 + }, + { + "epoch": 0.248, + "grad_norm": 0.5612509250640869, + "learning_rate": 0.00019664657767216176, + "loss": 0.7787, + "step": 155 + }, + { + "epoch": 0.256, + "grad_norm": 0.610614538192749, + "learning_rate": 0.00019642805624188147, + "loss": 0.7574, + "step": 160 + }, + { + "epoch": 0.264, + "grad_norm": 0.679517924785614, + "learning_rate": 0.0001962027671586086, + "loss": 0.8487, + "step": 165 + }, + { + "epoch": 0.272, + "grad_norm": 0.6685434579849243, + "learning_rate": 0.00019597072623390668, + "loss": 0.6611, + "step": 170 + }, + { + "epoch": 0.28, + "grad_norm": 0.480293869972229, + "learning_rate": 0.00019573194975320673, + "loss": 0.7802, + "step": 175 + }, + { + "epoch": 0.288, + "grad_norm": 0.7727369070053101, + "learning_rate": 0.00019548645447466431, + "loss": 0.6727, + "step": 180 + }, + { + "epoch": 0.296, + "grad_norm": 0.6371043920516968, + "learning_rate": 0.00019523425762798329, + "loss": 0.7502, + "step": 185 + }, + { + "epoch": 0.304, + "grad_norm": 0.6399966478347778, + "learning_rate": 0.00019497537691320668, + "loss": 0.8401, + "step": 190 + }, + { + "epoch": 0.312, + "grad_norm": 0.7263137698173523, + "learning_rate": 0.00019470983049947444, + "loss": 0.7494, + "step": 195 + }, + { + "epoch": 0.32, + "grad_norm": 0.402416467666626, + "learning_rate": 0.00019443763702374812, + "loss": 0.7842, + "step": 200 + }, + { + "epoch": 0.328, + "grad_norm": 0.6639626026153564, + "learning_rate": 0.00019415881558950302, + "loss": 0.8082, + "step": 205 + }, + { + "epoch": 0.336, + "grad_norm": 0.5801042914390564, + "learning_rate": 0.00019387338576538744, + "loss": 0.7883, + "step": 210 + }, + { + "epoch": 0.344, + "grad_norm": 0.5533607006072998, + "learning_rate": 0.00019358136758384912, + "loss": 0.7356, + "step": 215 + }, + { + "epoch": 0.352, + "grad_norm": 0.6019654273986816, + "learning_rate": 0.00019328278153972947, + "loss": 0.7891, + "step": 220 + }, + { + "epoch": 0.36, + "grad_norm": 0.5344104170799255, + "learning_rate": 0.00019297764858882514, + "loss": 0.7671, + "step": 225 + }, + { + "epoch": 0.368, + "grad_norm": 0.5494843125343323, + "learning_rate": 0.0001926659901464172, + "loss": 0.6608, + "step": 230 + }, + { + "epoch": 0.376, + "grad_norm": 0.465420126914978, + "learning_rate": 0.00019234782808576824, + "loss": 0.647, + "step": 235 + }, + { + "epoch": 0.384, + "grad_norm": 0.5202775001525879, + "learning_rate": 0.00019202318473658705, + "loss": 0.729, + "step": 240 + }, + { + "epoch": 0.392, + "grad_norm": 0.5757818222045898, + "learning_rate": 0.00019169208288346166, + "loss": 0.6713, + "step": 245 + }, + { + "epoch": 0.4, + "grad_norm": 0.46555572748184204, + "learning_rate": 0.0001913545457642601, + "loss": 0.7049, + "step": 250 + }, + { + "epoch": 0.408, + "grad_norm": 0.5101790428161621, + "learning_rate": 0.00019101059706849957, + "loss": 0.7419, + "step": 255 + }, + { + "epoch": 0.416, + "grad_norm": 0.6083744764328003, + "learning_rate": 0.00019066026093568378, + "loss": 0.7148, + "step": 260 + }, + { + "epoch": 0.424, + "grad_norm": 0.4719640612602234, + "learning_rate": 0.00019030356195360874, + "loss": 0.7493, + "step": 265 + }, + { + "epoch": 0.432, + "grad_norm": 0.7365225553512573, + "learning_rate": 0.0001899405251566371, + "loss": 0.7652, + "step": 270 + }, + { + "epoch": 0.44, + "grad_norm": 0.4452705383300781, + "learning_rate": 0.0001895711760239413, + "loss": 0.7438, + "step": 275 + }, + { + "epoch": 0.448, + "grad_norm": 0.6071786284446716, + "learning_rate": 0.0001891955404777151, + "loss": 0.7683, + "step": 280 + }, + { + "epoch": 0.456, + "grad_norm": 0.5774498581886292, + "learning_rate": 0.00018881364488135448, + "loss": 0.8115, + "step": 285 + }, + { + "epoch": 0.464, + "grad_norm": 0.6134682893753052, + "learning_rate": 0.00018842551603760724, + "loss": 0.8335, + "step": 290 + }, + { + "epoch": 0.472, + "grad_norm": 0.4869893193244934, + "learning_rate": 0.00018803118118669202, + "loss": 0.6933, + "step": 295 + }, + { + "epoch": 0.48, + "grad_norm": 0.6457111239433289, + "learning_rate": 0.00018763066800438636, + "loss": 0.7515, + "step": 300 + }, + { + "epoch": 0.488, + "grad_norm": 0.59674471616745, + "learning_rate": 0.0001872240046000844, + "loss": 0.6931, + "step": 305 + }, + { + "epoch": 0.496, + "grad_norm": 0.44608160853385925, + "learning_rate": 0.00018681121951482393, + "loss": 0.782, + "step": 310 + }, + { + "epoch": 0.504, + "grad_norm": 0.5934664607048035, + "learning_rate": 0.00018639234171928353, + "loss": 0.7361, + "step": 315 + }, + { + "epoch": 0.512, + "grad_norm": 0.49716323614120483, + "learning_rate": 0.0001859674006117491, + "loss": 0.7443, + "step": 320 + }, + { + "epoch": 0.52, + "grad_norm": 0.47995495796203613, + "learning_rate": 0.00018553642601605068, + "loss": 0.7221, + "step": 325 + }, + { + "epoch": 0.528, + "grad_norm": 0.5177399516105652, + "learning_rate": 0.00018509944817946922, + "loss": 0.7622, + "step": 330 + }, + { + "epoch": 0.536, + "grad_norm": 0.6638798713684082, + "learning_rate": 0.0001846564977706138, + "loss": 0.8556, + "step": 335 + }, + { + "epoch": 0.544, + "grad_norm": 0.5056771636009216, + "learning_rate": 0.00018420760587726923, + "loss": 0.7814, + "step": 340 + }, + { + "epoch": 0.552, + "grad_norm": 0.44543707370758057, + "learning_rate": 0.0001837528040042142, + "loss": 0.722, + "step": 345 + }, + { + "epoch": 0.56, + "grad_norm": 0.6765120625495911, + "learning_rate": 0.00018329212407100994, + "loss": 0.7903, + "step": 350 + }, + { + "epoch": 0.568, + "grad_norm": 0.49232372641563416, + "learning_rate": 0.00018282559840976042, + "loss": 0.6996, + "step": 355 + }, + { + "epoch": 0.576, + "grad_norm": 0.47392791509628296, + "learning_rate": 0.00018235325976284275, + "loss": 0.773, + "step": 360 + }, + { + "epoch": 0.584, + "grad_norm": 0.5056615471839905, + "learning_rate": 0.00018187514128060946, + "loss": 0.728, + "step": 365 + }, + { + "epoch": 0.592, + "grad_norm": 0.5857616662979126, + "learning_rate": 0.00018139127651906184, + "loss": 0.7659, + "step": 370 + }, + { + "epoch": 0.6, + "grad_norm": 0.5966864228248596, + "learning_rate": 0.00018090169943749476, + "loss": 0.7039, + "step": 375 + }, + { + "epoch": 0.608, + "grad_norm": 0.4524347484111786, + "learning_rate": 0.00018040644439611348, + "loss": 0.7125, + "step": 380 + }, + { + "epoch": 0.616, + "grad_norm": 0.5570976138114929, + "learning_rate": 0.00017990554615362198, + "loss": 0.698, + "step": 385 + }, + { + "epoch": 0.624, + "grad_norm": 0.6045777201652527, + "learning_rate": 0.00017939903986478355, + "loss": 0.8255, + "step": 390 + }, + { + "epoch": 0.632, + "grad_norm": 0.6149687767028809, + "learning_rate": 0.00017888696107795342, + "loss": 0.6616, + "step": 395 + }, + { + "epoch": 0.64, + "grad_norm": 0.4873579144477844, + "learning_rate": 0.000178369345732584, + "loss": 0.7452, + "step": 400 + }, + { + "epoch": 0.648, + "grad_norm": 0.5569061636924744, + "learning_rate": 0.00017784623015670238, + "loss": 0.7652, + "step": 405 + }, + { + "epoch": 0.656, + "grad_norm": 0.5825181603431702, + "learning_rate": 0.00017731765106436073, + "loss": 0.7793, + "step": 410 + }, + { + "epoch": 0.664, + "grad_norm": 0.4047383666038513, + "learning_rate": 0.00017678364555305978, + "loss": 0.6875, + "step": 415 + }, + { + "epoch": 0.672, + "grad_norm": 0.5080836415290833, + "learning_rate": 0.0001762442511011448, + "loss": 0.7465, + "step": 420 + }, + { + "epoch": 0.68, + "grad_norm": 0.5825940370559692, + "learning_rate": 0.00017569950556517566, + "loss": 0.7205, + "step": 425 + }, + { + "epoch": 0.688, + "grad_norm": 0.476992666721344, + "learning_rate": 0.00017514944717726962, + "loss": 0.6589, + "step": 430 + }, + { + "epoch": 0.696, + "grad_norm": 0.7424727082252502, + "learning_rate": 0.00017459411454241822, + "loss": 0.7035, + "step": 435 + }, + { + "epoch": 0.704, + "grad_norm": 0.6544787287712097, + "learning_rate": 0.00017403354663577783, + "loss": 0.787, + "step": 440 + }, + { + "epoch": 0.712, + "grad_norm": 0.49425187706947327, + "learning_rate": 0.00017346778279993415, + "loss": 0.7515, + "step": 445 + }, + { + "epoch": 0.72, + "grad_norm": 0.5473236441612244, + "learning_rate": 0.00017289686274214118, + "loss": 0.7199, + "step": 450 + }, + { + "epoch": 0.728, + "grad_norm": 0.6773544549942017, + "learning_rate": 0.00017232082653153422, + "loss": 0.8037, + "step": 455 + }, + { + "epoch": 0.736, + "grad_norm": 0.6355096101760864, + "learning_rate": 0.00017173971459631787, + "loss": 0.7502, + "step": 460 + }, + { + "epoch": 0.744, + "grad_norm": 0.47867000102996826, + "learning_rate": 0.00017115356772092857, + "loss": 0.7446, + "step": 465 + }, + { + "epoch": 0.752, + "grad_norm": 0.5135357975959778, + "learning_rate": 0.0001705624270431721, + "loss": 0.6507, + "step": 470 + }, + { + "epoch": 0.76, + "grad_norm": 0.48866042494773865, + "learning_rate": 0.00016996633405133655, + "loss": 0.7164, + "step": 475 + }, + { + "epoch": 0.768, + "grad_norm": 0.5892354249954224, + "learning_rate": 0.0001693653305812805, + "loss": 0.7621, + "step": 480 + }, + { + "epoch": 0.776, + "grad_norm": 0.6633970141410828, + "learning_rate": 0.00016875945881349676, + "loss": 0.7623, + "step": 485 + }, + { + "epoch": 0.784, + "grad_norm": 0.6444060802459717, + "learning_rate": 0.000168148761270152, + "loss": 0.6606, + "step": 490 + }, + { + "epoch": 0.792, + "grad_norm": 0.7012648582458496, + "learning_rate": 0.00016753328081210245, + "loss": 0.6941, + "step": 495 + }, + { + "epoch": 0.8, + "grad_norm": 0.7064160704612732, + "learning_rate": 0.00016691306063588583, + "loss": 0.6841, + "step": 500 + }, + { + "epoch": 0.808, + "grad_norm": 0.7241398096084595, + "learning_rate": 0.00016628814427068953, + "loss": 0.6996, + "step": 505 + }, + { + "epoch": 0.816, + "grad_norm": 0.7807374596595764, + "learning_rate": 0.00016565857557529566, + "loss": 0.7542, + "step": 510 + }, + { + "epoch": 0.824, + "grad_norm": 0.763768196105957, + "learning_rate": 0.00016502439873500289, + "loss": 0.7175, + "step": 515 + }, + { + "epoch": 0.832, + "grad_norm": 0.6105090379714966, + "learning_rate": 0.0001643856582585254, + "loss": 0.7565, + "step": 520 + }, + { + "epoch": 0.84, + "grad_norm": 0.5686540603637695, + "learning_rate": 0.000163742398974869, + "loss": 0.7339, + "step": 525 + }, + { + "epoch": 0.848, + "grad_norm": 0.5341500043869019, + "learning_rate": 0.00016309466603018496, + "loss": 0.569, + "step": 530 + }, + { + "epoch": 0.856, + "grad_norm": 0.7274748682975769, + "learning_rate": 0.00016244250488460158, + "loss": 0.7556, + "step": 535 + }, + { + "epoch": 0.864, + "grad_norm": 0.7321165204048157, + "learning_rate": 0.00016178596130903344, + "loss": 0.7084, + "step": 540 + }, + { + "epoch": 0.872, + "grad_norm": 0.5086159110069275, + "learning_rate": 0.00016112508138196917, + "loss": 0.6935, + "step": 545 + }, + { + "epoch": 0.88, + "grad_norm": 0.4714389443397522, + "learning_rate": 0.0001604599114862375, + "loss": 0.7076, + "step": 550 + }, + { + "epoch": 0.888, + "grad_norm": 0.5031452178955078, + "learning_rate": 0.0001597904983057519, + "loss": 0.7151, + "step": 555 + }, + { + "epoch": 0.896, + "grad_norm": 0.7745943665504456, + "learning_rate": 0.0001591168888222342, + "loss": 0.7001, + "step": 560 + }, + { + "epoch": 0.904, + "grad_norm": 0.6076303124427795, + "learning_rate": 0.00015843913031191723, + "loss": 0.7285, + "step": 565 + }, + { + "epoch": 0.912, + "grad_norm": 0.7456529140472412, + "learning_rate": 0.00015775727034222675, + "loss": 0.8041, + "step": 570 + }, + { + "epoch": 0.92, + "grad_norm": 0.5760998725891113, + "learning_rate": 0.0001570713567684432, + "loss": 0.7353, + "step": 575 + }, + { + "epoch": 0.928, + "grad_norm": 0.7057327032089233, + "learning_rate": 0.00015638143773034267, + "loss": 0.7792, + "step": 580 + }, + { + "epoch": 0.936, + "grad_norm": 0.7615967392921448, + "learning_rate": 0.00015568756164881882, + "loss": 1.0121, + "step": 585 + }, + { + "epoch": 0.944, + "grad_norm": 0.6304950714111328, + "learning_rate": 0.000154989777222484, + "loss": 0.7727, + "step": 590 + }, + { + "epoch": 0.952, + "grad_norm": 0.6852543950080872, + "learning_rate": 0.00015428813342425177, + "loss": 0.741, + "step": 595 + }, + { + "epoch": 0.96, + "grad_norm": 0.6379660964012146, + "learning_rate": 0.00015358267949789966, + "loss": 0.6919, + "step": 600 + }, + { + "epoch": 0.968, + "grad_norm": 0.5846463441848755, + "learning_rate": 0.00015287346495461315, + "loss": 0.7163, + "step": 605 + }, + { + "epoch": 0.976, + "grad_norm": 0.5999557971954346, + "learning_rate": 0.0001521605395695108, + "loss": 0.8152, + "step": 610 + }, + { + "epoch": 0.984, + "grad_norm": 0.5806307196617126, + "learning_rate": 0.00015144395337815064, + "loss": 0.6709, + "step": 615 + }, + { + "epoch": 0.992, + "grad_norm": 0.6559942960739136, + "learning_rate": 0.00015072375667301893, + "loss": 0.6527, + "step": 620 + }, + { + "epoch": 1.0, + "grad_norm": 0.6287715435028076, + "learning_rate": 0.00015000000000000001, + "loss": 0.8194, + "step": 625 + }, + { + "epoch": 1.008, + "grad_norm": 0.616222620010376, + "learning_rate": 0.00014927273415482915, + "loss": 0.6627, + "step": 630 + }, + { + "epoch": 1.016, + "grad_norm": 0.4750412106513977, + "learning_rate": 0.0001485420101795274, + "loss": 0.6366, + "step": 635 + }, + { + "epoch": 1.024, + "grad_norm": 0.5122964978218079, + "learning_rate": 0.00014780787935881923, + "loss": 0.6717, + "step": 640 + }, + { + "epoch": 1.032, + "grad_norm": 0.7382633090019226, + "learning_rate": 0.0001470703932165333, + "loss": 0.6483, + "step": 645 + }, + { + "epoch": 1.04, + "grad_norm": 0.6540554761886597, + "learning_rate": 0.00014632960351198618, + "loss": 0.6151, + "step": 650 + }, + { + "epoch": 1.048, + "grad_norm": 0.4776591956615448, + "learning_rate": 0.00014558556223635003, + "loss": 0.6707, + "step": 655 + }, + { + "epoch": 1.056, + "grad_norm": 0.8012662529945374, + "learning_rate": 0.00014483832160900326, + "loss": 0.6125, + "step": 660 + }, + { + "epoch": 1.064, + "grad_norm": 0.6735953092575073, + "learning_rate": 0.00014408793407386588, + "loss": 0.6206, + "step": 665 + }, + { + "epoch": 1.072, + "grad_norm": 0.5640230774879456, + "learning_rate": 0.00014333445229571873, + "loss": 0.6161, + "step": 670 + }, + { + "epoch": 1.08, + "grad_norm": 0.5928654074668884, + "learning_rate": 0.00014257792915650728, + "loss": 0.6583, + "step": 675 + }, + { + "epoch": 1.088, + "grad_norm": 0.7347397208213806, + "learning_rate": 0.00014181841775163013, + "loss": 0.6222, + "step": 680 + }, + { + "epoch": 1.096, + "grad_norm": 0.593773365020752, + "learning_rate": 0.0001410559713862128, + "loss": 0.716, + "step": 685 + }, + { + "epoch": 1.104, + "grad_norm": 0.6244611144065857, + "learning_rate": 0.00014029064357136628, + "loss": 0.6198, + "step": 690 + }, + { + "epoch": 1.112, + "grad_norm": 0.5083370804786682, + "learning_rate": 0.00013952248802043165, + "loss": 0.6389, + "step": 695 + }, + { + "epoch": 1.12, + "grad_norm": 0.5241413116455078, + "learning_rate": 0.0001387515586452103, + "loss": 0.6842, + "step": 700 + }, + { + "epoch": 1.1280000000000001, + "grad_norm": 0.524029016494751, + "learning_rate": 0.00013797790955218014, + "loss": 0.6071, + "step": 705 + }, + { + "epoch": 1.1360000000000001, + "grad_norm": 0.5097878575325012, + "learning_rate": 0.00013720159503869815, + "loss": 0.5915, + "step": 710 + }, + { + "epoch": 1.144, + "grad_norm": 0.5782963037490845, + "learning_rate": 0.00013642266958918984, + "loss": 0.6794, + "step": 715 + }, + { + "epoch": 1.152, + "grad_norm": 0.6088266372680664, + "learning_rate": 0.00013564118787132506, + "loss": 0.6773, + "step": 720 + }, + { + "epoch": 1.16, + "grad_norm": 0.7768995761871338, + "learning_rate": 0.00013485720473218154, + "loss": 0.668, + "step": 725 + }, + { + "epoch": 1.168, + "grad_norm": 0.6645551919937134, + "learning_rate": 0.0001340707751943952, + "loss": 0.6997, + "step": 730 + }, + { + "epoch": 1.176, + "grad_norm": 0.9228842258453369, + "learning_rate": 0.00013328195445229868, + "loss": 0.831, + "step": 735 + }, + { + "epoch": 1.184, + "grad_norm": 0.7556049823760986, + "learning_rate": 0.00013249079786804765, + "loss": 0.6378, + "step": 740 + }, + { + "epoch": 1.192, + "grad_norm": 0.832775354385376, + "learning_rate": 0.0001316973609677352, + "loss": 0.6547, + "step": 745 + }, + { + "epoch": 1.2, + "grad_norm": 0.7329304814338684, + "learning_rate": 0.00013090169943749476, + "loss": 0.5808, + "step": 750 + }, + { + "epoch": 1.208, + "grad_norm": 0.7193475961685181, + "learning_rate": 0.00013010386911959206, + "loss": 0.5582, + "step": 755 + }, + { + "epoch": 1.216, + "grad_norm": 0.6274734735488892, + "learning_rate": 0.00012930392600850573, + "loss": 0.5801, + "step": 760 + }, + { + "epoch": 1.224, + "grad_norm": 0.6485865712165833, + "learning_rate": 0.0001285019262469976, + "loss": 0.65, + "step": 765 + }, + { + "epoch": 1.232, + "grad_norm": 0.7164427042007446, + "learning_rate": 0.00012769792612217224, + "loss": 0.6627, + "step": 770 + }, + { + "epoch": 1.24, + "grad_norm": 0.600775957107544, + "learning_rate": 0.00012689198206152657, + "loss": 0.5603, + "step": 775 + }, + { + "epoch": 1.248, + "grad_norm": 0.8377975225448608, + "learning_rate": 0.00012608415062898972, + "loss": 0.6525, + "step": 780 + }, + { + "epoch": 1.256, + "grad_norm": 0.8069924116134644, + "learning_rate": 0.00012527448852095295, + "loss": 0.6731, + "step": 785 + }, + { + "epoch": 1.264, + "grad_norm": 0.6501213908195496, + "learning_rate": 0.00012446305256229073, + "loss": 0.6255, + "step": 790 + }, + { + "epoch": 1.272, + "grad_norm": 0.62812340259552, + "learning_rate": 0.00012364989970237248, + "loss": 0.6585, + "step": 795 + }, + { + "epoch": 1.28, + "grad_norm": 0.5702307820320129, + "learning_rate": 0.00012283508701106557, + "loss": 0.5996, + "step": 800 + }, + { + "epoch": 1.288, + "grad_norm": 0.6311281323432922, + "learning_rate": 0.00012201867167473015, + "loss": 0.6355, + "step": 805 + }, + { + "epoch": 1.296, + "grad_norm": 0.5885419249534607, + "learning_rate": 0.00012120071099220549, + "loss": 0.6615, + "step": 810 + }, + { + "epoch": 1.304, + "grad_norm": 0.5239307284355164, + "learning_rate": 0.0001203812623707885, + "loss": 0.6096, + "step": 815 + }, + { + "epoch": 1.312, + "grad_norm": 0.6101869940757751, + "learning_rate": 0.00011956038332220483, + "loss": 0.5984, + "step": 820 + }, + { + "epoch": 1.32, + "grad_norm": 0.4395413100719452, + "learning_rate": 0.00011873813145857249, + "loss": 0.5569, + "step": 825 + }, + { + "epoch": 1.328, + "grad_norm": 0.8984820246696472, + "learning_rate": 0.00011791456448835825, + "loss": 0.7088, + "step": 830 + }, + { + "epoch": 1.336, + "grad_norm": 0.7709664106369019, + "learning_rate": 0.00011708974021232769, + "loss": 0.6731, + "step": 835 + }, + { + "epoch": 1.3439999999999999, + "grad_norm": 0.6782217025756836, + "learning_rate": 0.00011626371651948838, + "loss": 0.6188, + "step": 840 + }, + { + "epoch": 1.3519999999999999, + "grad_norm": 0.6427358984947205, + "learning_rate": 0.00011543655138302714, + "loss": 0.7004, + "step": 845 + }, + { + "epoch": 1.3599999999999999, + "grad_norm": 0.5902594923973083, + "learning_rate": 0.00011460830285624118, + "loss": 0.5884, + "step": 850 + }, + { + "epoch": 1.3679999999999999, + "grad_norm": 0.5935835838317871, + "learning_rate": 0.0001137790290684638, + "loss": 0.5739, + "step": 855 + }, + { + "epoch": 1.376, + "grad_norm": 0.6752728223800659, + "learning_rate": 0.00011294878822098469, + "loss": 0.6435, + "step": 860 + }, + { + "epoch": 1.384, + "grad_norm": 0.7927135825157166, + "learning_rate": 0.00011211763858296507, + "loss": 0.6897, + "step": 865 + }, + { + "epoch": 1.392, + "grad_norm": 0.714499294757843, + "learning_rate": 0.00011128563848734816, + "loss": 0.6641, + "step": 870 + }, + { + "epoch": 1.4, + "grad_norm": 0.7086356282234192, + "learning_rate": 0.00011045284632676536, + "loss": 0.6273, + "step": 875 + }, + { + "epoch": 1.408, + "grad_norm": 0.6125518679618835, + "learning_rate": 0.00010961932054943778, + "loss": 0.6437, + "step": 880 + }, + { + "epoch": 1.416, + "grad_norm": 0.5635287165641785, + "learning_rate": 0.00010878511965507434, + "loss": 0.6345, + "step": 885 + }, + { + "epoch": 1.424, + "grad_norm": 0.47936007380485535, + "learning_rate": 0.00010795030219076599, + "loss": 0.5913, + "step": 890 + }, + { + "epoch": 1.432, + "grad_norm": 0.7142558097839355, + "learning_rate": 0.00010711492674687671, + "loss": 0.6482, + "step": 895 + }, + { + "epoch": 1.44, + "grad_norm": 0.5252729058265686, + "learning_rate": 0.00010627905195293135, + "loss": 0.6165, + "step": 900 + }, + { + "epoch": 1.448, + "grad_norm": 0.896318793296814, + "learning_rate": 0.00010544273647350092, + "loss": 0.634, + "step": 905 + }, + { + "epoch": 1.456, + "grad_norm": 0.6029036045074463, + "learning_rate": 0.00010460603900408523, + "loss": 0.6509, + "step": 910 + }, + { + "epoch": 1.464, + "grad_norm": 0.6835671663284302, + "learning_rate": 0.00010376901826699348, + "loss": 0.6212, + "step": 915 + }, + { + "epoch": 1.472, + "grad_norm": 0.7098750472068787, + "learning_rate": 0.00010293173300722285, + "loss": 0.7305, + "step": 920 + }, + { + "epoch": 1.48, + "grad_norm": 0.675316333770752, + "learning_rate": 0.0001020942419883357, + "loss": 0.6685, + "step": 925 + }, + { + "epoch": 1.488, + "grad_norm": 0.6675406098365784, + "learning_rate": 0.00010125660398833528, + "loss": 0.6214, + "step": 930 + }, + { + "epoch": 1.496, + "grad_norm": 0.6629154682159424, + "learning_rate": 0.0001004188777955404, + "loss": 0.6035, + "step": 935 + }, + { + "epoch": 1.504, + "grad_norm": 0.7732692360877991, + "learning_rate": 9.958112220445963e-05, + "loss": 0.5868, + "step": 940 + }, + { + "epoch": 1.512, + "grad_norm": 0.6238484978675842, + "learning_rate": 9.874339601166473e-05, + "loss": 0.6003, + "step": 945 + }, + { + "epoch": 1.52, + "grad_norm": 0.6622412800788879, + "learning_rate": 9.790575801166432e-05, + "loss": 0.5854, + "step": 950 + }, + { + "epoch": 1.528, + "grad_norm": 0.7598085999488831, + "learning_rate": 9.706826699277718e-05, + "loss": 0.5882, + "step": 955 + }, + { + "epoch": 1.536, + "grad_norm": 0.8870390057563782, + "learning_rate": 9.623098173300654e-05, + "loss": 0.7187, + "step": 960 + }, + { + "epoch": 1.544, + "grad_norm": 0.702232837677002, + "learning_rate": 9.539396099591476e-05, + "loss": 0.6156, + "step": 965 + }, + { + "epoch": 1.552, + "grad_norm": 0.694139301776886, + "learning_rate": 9.455726352649911e-05, + "loss": 0.6488, + "step": 970 + }, + { + "epoch": 1.56, + "grad_norm": 0.5684956312179565, + "learning_rate": 9.372094804706867e-05, + "loss": 0.6601, + "step": 975 + }, + { + "epoch": 1.568, + "grad_norm": 0.6327118277549744, + "learning_rate": 9.288507325312335e-05, + "loss": 0.5968, + "step": 980 + }, + { + "epoch": 1.576, + "grad_norm": 0.5732144117355347, + "learning_rate": 9.204969780923403e-05, + "loss": 0.7034, + "step": 985 + }, + { + "epoch": 1.584, + "grad_norm": 0.7025273442268372, + "learning_rate": 9.121488034492569e-05, + "loss": 0.5973, + "step": 990 + }, + { + "epoch": 1.592, + "grad_norm": 0.9270740151405334, + "learning_rate": 9.038067945056227e-05, + "loss": 0.7877, + "step": 995 + }, + { + "epoch": 1.6, + "grad_norm": 0.6676818132400513, + "learning_rate": 8.954715367323468e-05, + "loss": 0.644, + "step": 1000 + }, + { + "epoch": 1.608, + "grad_norm": 0.6687547564506531, + "learning_rate": 8.871436151265184e-05, + "loss": 0.6678, + "step": 1005 + }, + { + "epoch": 1.616, + "grad_norm": 0.7400322556495667, + "learning_rate": 8.788236141703498e-05, + "loss": 0.6088, + "step": 1010 + }, + { + "epoch": 1.624, + "grad_norm": 0.5504963994026184, + "learning_rate": 8.705121177901532e-05, + "loss": 0.6219, + "step": 1015 + }, + { + "epoch": 1.6320000000000001, + "grad_norm": 0.8088738322257996, + "learning_rate": 8.62209709315362e-05, + "loss": 0.6698, + "step": 1020 + }, + { + "epoch": 1.6400000000000001, + "grad_norm": 0.7380816340446472, + "learning_rate": 8.539169714375885e-05, + "loss": 0.6207, + "step": 1025 + }, + { + "epoch": 1.6480000000000001, + "grad_norm": 0.6346850395202637, + "learning_rate": 8.456344861697289e-05, + "loss": 0.626, + "step": 1030 + }, + { + "epoch": 1.6560000000000001, + "grad_norm": 0.49918702244758606, + "learning_rate": 8.373628348051165e-05, + "loss": 0.6972, + "step": 1035 + }, + { + "epoch": 1.6640000000000001, + "grad_norm": 0.7200607657432556, + "learning_rate": 8.291025978767235e-05, + "loss": 0.6282, + "step": 1040 + }, + { + "epoch": 1.6720000000000002, + "grad_norm": 0.8350688815116882, + "learning_rate": 8.208543551164178e-05, + "loss": 0.6219, + "step": 1045 + }, + { + "epoch": 1.6800000000000002, + "grad_norm": 0.5957468748092651, + "learning_rate": 8.126186854142752e-05, + "loss": 0.622, + "step": 1050 + }, + { + "epoch": 1.688, + "grad_norm": 0.7494757175445557, + "learning_rate": 8.04396166777952e-05, + "loss": 0.5801, + "step": 1055 + }, + { + "epoch": 1.696, + "grad_norm": 0.7109113931655884, + "learning_rate": 7.961873762921153e-05, + "loss": 0.598, + "step": 1060 + }, + { + "epoch": 1.704, + "grad_norm": 0.7941219210624695, + "learning_rate": 7.879928900779456e-05, + "loss": 0.699, + "step": 1065 + }, + { + "epoch": 1.712, + "grad_norm": 0.7282152771949768, + "learning_rate": 7.798132832526986e-05, + "loss": 0.5882, + "step": 1070 + }, + { + "epoch": 1.72, + "grad_norm": 0.5923735499382019, + "learning_rate": 7.716491298893442e-05, + "loss": 0.5321, + "step": 1075 + }, + { + "epoch": 1.728, + "grad_norm": 0.5571523308753967, + "learning_rate": 7.635010029762756e-05, + "loss": 0.6647, + "step": 1080 + }, + { + "epoch": 1.736, + "grad_norm": 0.6043043732643127, + "learning_rate": 7.553694743770928e-05, + "loss": 0.628, + "step": 1085 + }, + { + "epoch": 1.744, + "grad_norm": 0.8733720183372498, + "learning_rate": 7.472551147904708e-05, + "loss": 0.6262, + "step": 1090 + }, + { + "epoch": 1.752, + "grad_norm": 0.5561848878860474, + "learning_rate": 7.391584937101033e-05, + "loss": 0.6131, + "step": 1095 + }, + { + "epoch": 1.76, + "grad_norm": 0.6611356735229492, + "learning_rate": 7.310801793847344e-05, + "loss": 0.6494, + "step": 1100 + }, + { + "epoch": 1.768, + "grad_norm": 0.5700333714485168, + "learning_rate": 7.230207387782776e-05, + "loss": 0.5514, + "step": 1105 + }, + { + "epoch": 1.776, + "grad_norm": 0.705662727355957, + "learning_rate": 7.149807375300239e-05, + "loss": 0.5823, + "step": 1110 + }, + { + "epoch": 1.784, + "grad_norm": 0.9632449150085449, + "learning_rate": 7.069607399149428e-05, + "loss": 0.7207, + "step": 1115 + }, + { + "epoch": 1.792, + "grad_norm": 0.6412242650985718, + "learning_rate": 6.989613088040796e-05, + "loss": 0.7006, + "step": 1120 + }, + { + "epoch": 1.8, + "grad_norm": 0.538044273853302, + "learning_rate": 6.909830056250527e-05, + "loss": 0.6222, + "step": 1125 + }, + { + "epoch": 1.808, + "grad_norm": 0.6124762296676636, + "learning_rate": 6.830263903226483e-05, + "loss": 0.6569, + "step": 1130 + }, + { + "epoch": 1.8159999999999998, + "grad_norm": 0.7250857949256897, + "learning_rate": 6.750920213195238e-05, + "loss": 0.543, + "step": 1135 + }, + { + "epoch": 1.8239999999999998, + "grad_norm": 0.6840488910675049, + "learning_rate": 6.671804554770135e-05, + "loss": 0.6334, + "step": 1140 + }, + { + "epoch": 1.8319999999999999, + "grad_norm": 0.8932304382324219, + "learning_rate": 6.592922480560483e-05, + "loss": 0.6701, + "step": 1145 + }, + { + "epoch": 1.8399999999999999, + "grad_norm": 0.9220761656761169, + "learning_rate": 6.51427952678185e-05, + "loss": 0.6216, + "step": 1150 + }, + { + "epoch": 1.8479999999999999, + "grad_norm": 0.688731849193573, + "learning_rate": 6.435881212867493e-05, + "loss": 0.5877, + "step": 1155 + }, + { + "epoch": 1.8559999999999999, + "grad_norm": 0.6370134949684143, + "learning_rate": 6.357733041081018e-05, + "loss": 0.6256, + "step": 1160 + }, + { + "epoch": 1.8639999999999999, + "grad_norm": 0.7421153783798218, + "learning_rate": 6.27984049613019e-05, + "loss": 0.7062, + "step": 1165 + }, + { + "epoch": 1.8719999999999999, + "grad_norm": 0.8129108548164368, + "learning_rate": 6.20220904478199e-05, + "loss": 0.6304, + "step": 1170 + }, + { + "epoch": 1.88, + "grad_norm": 0.7866687774658203, + "learning_rate": 6.12484413547897e-05, + "loss": 0.7695, + "step": 1175 + }, + { + "epoch": 1.888, + "grad_norm": 0.8307198882102966, + "learning_rate": 6.047751197956838e-05, + "loss": 0.5723, + "step": 1180 + }, + { + "epoch": 1.896, + "grad_norm": 0.5541536211967468, + "learning_rate": 5.9709356428633746e-05, + "loss": 0.6847, + "step": 1185 + }, + { + "epoch": 1.904, + "grad_norm": 0.731555163860321, + "learning_rate": 5.8944028613787206e-05, + "loss": 0.6618, + "step": 1190 + }, + { + "epoch": 1.912, + "grad_norm": 1.020991325378418, + "learning_rate": 5.818158224836987e-05, + "loss": 0.6275, + "step": 1195 + }, + { + "epoch": 1.92, + "grad_norm": 0.7172287702560425, + "learning_rate": 5.7422070843492734e-05, + "loss": 0.5617, + "step": 1200 + }, + { + "epoch": 1.928, + "grad_norm": 0.7232096791267395, + "learning_rate": 5.666554770428129e-05, + "loss": 0.6278, + "step": 1205 + }, + { + "epoch": 1.936, + "grad_norm": 0.7234194278717041, + "learning_rate": 5.591206592613416e-05, + "loss": 0.6713, + "step": 1210 + }, + { + "epoch": 1.944, + "grad_norm": 0.6906010508537292, + "learning_rate": 5.5161678390996796e-05, + "loss": 0.6113, + "step": 1215 + }, + { + "epoch": 1.952, + "grad_norm": 0.9057906270027161, + "learning_rate": 5.441443776365003e-05, + "loss": 0.5587, + "step": 1220 + }, + { + "epoch": 1.96, + "grad_norm": 0.7144932150840759, + "learning_rate": 5.3670396488013854e-05, + "loss": 0.5601, + "step": 1225 + }, + { + "epoch": 1.968, + "grad_norm": 1.0071252584457397, + "learning_rate": 5.292960678346675e-05, + "loss": 0.5941, + "step": 1230 + }, + { + "epoch": 1.976, + "grad_norm": 0.7061260342597961, + "learning_rate": 5.2192120641180786e-05, + "loss": 0.6285, + "step": 1235 + }, + { + "epoch": 1.984, + "grad_norm": 0.6937069892883301, + "learning_rate": 5.145798982047261e-05, + "loss": 0.6516, + "step": 1240 + }, + { + "epoch": 1.992, + "grad_norm": 0.5176392793655396, + "learning_rate": 5.072726584517086e-05, + "loss": 0.5904, + "step": 1245 + }, + { + "epoch": 2.0, + "grad_norm": 0.8501409292221069, + "learning_rate": 5.000000000000002e-05, + "loss": 0.619, + "step": 1250 + }, + { + "epoch": 2.008, + "grad_norm": 0.5451085567474365, + "learning_rate": 4.927624332698109e-05, + "loss": 0.6058, + "step": 1255 + }, + { + "epoch": 2.016, + "grad_norm": 0.8410437107086182, + "learning_rate": 4.8556046621849346e-05, + "loss": 0.6248, + "step": 1260 + }, + { + "epoch": 2.024, + "grad_norm": 0.6089378595352173, + "learning_rate": 4.783946043048923e-05, + "loss": 0.5247, + "step": 1265 + }, + { + "epoch": 2.032, + "grad_norm": 0.47437986731529236, + "learning_rate": 4.712653504538683e-05, + "loss": 0.5439, + "step": 1270 + }, + { + "epoch": 2.04, + "grad_norm": 0.6925654411315918, + "learning_rate": 4.6417320502100316e-05, + "loss": 0.4491, + "step": 1275 + }, + { + "epoch": 2.048, + "grad_norm": 0.8538162708282471, + "learning_rate": 4.5711866575748276e-05, + "loss": 0.52, + "step": 1280 + }, + { + "epoch": 2.056, + "grad_norm": 0.8048768639564514, + "learning_rate": 4.501022277751602e-05, + "loss": 0.5259, + "step": 1285 + }, + { + "epoch": 2.064, + "grad_norm": 0.7191641330718994, + "learning_rate": 4.431243835118124e-05, + "loss": 0.5025, + "step": 1290 + }, + { + "epoch": 2.072, + "grad_norm": 1.0918892621994019, + "learning_rate": 4.361856226965733e-05, + "loss": 0.4772, + "step": 1295 + }, + { + "epoch": 2.08, + "grad_norm": 0.8152824640274048, + "learning_rate": 4.2928643231556844e-05, + "loss": 0.5945, + "step": 1300 + }, + { + "epoch": 2.088, + "grad_norm": 0.640073299407959, + "learning_rate": 4.224272965777326e-05, + "loss": 0.4813, + "step": 1305 + }, + { + "epoch": 2.096, + "grad_norm": 0.7172432541847229, + "learning_rate": 4.15608696880828e-05, + "loss": 0.5315, + "step": 1310 + }, + { + "epoch": 2.104, + "grad_norm": 1.1283674240112305, + "learning_rate": 4.08831111777658e-05, + "loss": 0.5591, + "step": 1315 + }, + { + "epoch": 2.112, + "grad_norm": 0.8184736967086792, + "learning_rate": 4.020950169424815e-05, + "loss": 0.605, + "step": 1320 + }, + { + "epoch": 2.12, + "grad_norm": 0.6823618412017822, + "learning_rate": 3.954008851376252e-05, + "loss": 0.4955, + "step": 1325 + }, + { + "epoch": 2.128, + "grad_norm": 0.8576385378837585, + "learning_rate": 3.887491861803085e-05, + "loss": 0.5757, + "step": 1330 + }, + { + "epoch": 2.136, + "grad_norm": 0.967835009098053, + "learning_rate": 3.821403869096658e-05, + "loss": 0.5313, + "step": 1335 + }, + { + "epoch": 2.144, + "grad_norm": 0.7330173254013062, + "learning_rate": 3.755749511539845e-05, + "loss": 0.5904, + "step": 1340 + }, + { + "epoch": 2.152, + "grad_norm": 0.6664792895317078, + "learning_rate": 3.690533396981504e-05, + "loss": 0.4679, + "step": 1345 + }, + { + "epoch": 2.16, + "grad_norm": 0.7639065980911255, + "learning_rate": 3.6257601025131026e-05, + "loss": 0.5235, + "step": 1350 + }, + { + "epoch": 2.168, + "grad_norm": 0.6960520148277283, + "learning_rate": 3.561434174147463e-05, + "loss": 0.5797, + "step": 1355 + }, + { + "epoch": 2.176, + "grad_norm": 0.8231356143951416, + "learning_rate": 3.497560126499709e-05, + "loss": 0.5772, + "step": 1360 + }, + { + "epoch": 2.184, + "grad_norm": 0.8968437910079956, + "learning_rate": 3.4341424424704375e-05, + "loss": 0.5316, + "step": 1365 + }, + { + "epoch": 2.192, + "grad_norm": 0.8037480711936951, + "learning_rate": 3.371185572931048e-05, + "loss": 0.5646, + "step": 1370 + }, + { + "epoch": 2.2, + "grad_norm": 0.9330148696899414, + "learning_rate": 3.308693936411421e-05, + "loss": 0.5431, + "step": 1375 + }, + { + "epoch": 2.208, + "grad_norm": 0.6958775520324707, + "learning_rate": 3.246671918789755e-05, + "loss": 0.5403, + "step": 1380 + }, + { + "epoch": 2.216, + "grad_norm": 0.9870476126670837, + "learning_rate": 3.1851238729848034e-05, + "loss": 0.5329, + "step": 1385 + }, + { + "epoch": 2.224, + "grad_norm": 0.5036590099334717, + "learning_rate": 3.124054118650327e-05, + "loss": 0.5696, + "step": 1390 + }, + { + "epoch": 2.232, + "grad_norm": 0.8640053868293762, + "learning_rate": 3.063466941871952e-05, + "loss": 0.59, + "step": 1395 + }, + { + "epoch": 2.24, + "grad_norm": 0.6065173149108887, + "learning_rate": 3.0033665948663448e-05, + "loss": 0.5116, + "step": 1400 + }, + { + "epoch": 2.248, + "grad_norm": 1.083775520324707, + "learning_rate": 2.9437572956827964e-05, + "loss": 0.5783, + "step": 1405 + }, + { + "epoch": 2.2560000000000002, + "grad_norm": 0.7090497016906738, + "learning_rate": 2.8846432279071467e-05, + "loss": 0.6259, + "step": 1410 + }, + { + "epoch": 2.2640000000000002, + "grad_norm": 0.742468535900116, + "learning_rate": 2.826028540368215e-05, + "loss": 0.5759, + "step": 1415 + }, + { + "epoch": 2.2720000000000002, + "grad_norm": 0.9219839572906494, + "learning_rate": 2.7679173468465812e-05, + "loss": 0.497, + "step": 1420 + }, + { + "epoch": 2.2800000000000002, + "grad_norm": 0.7159206867218018, + "learning_rate": 2.7103137257858868e-05, + "loss": 0.619, + "step": 1425 + }, + { + "epoch": 2.288, + "grad_norm": 0.6997727751731873, + "learning_rate": 2.6532217200065858e-05, + "loss": 0.5858, + "step": 1430 + }, + { + "epoch": 2.296, + "grad_norm": 0.7493643164634705, + "learning_rate": 2.5966453364222186e-05, + "loss": 0.6291, + "step": 1435 + }, + { + "epoch": 2.304, + "grad_norm": 0.8311699032783508, + "learning_rate": 2.540588545758179e-05, + "loss": 0.6418, + "step": 1440 + }, + { + "epoch": 2.312, + "grad_norm": 0.7084354758262634, + "learning_rate": 2.48505528227304e-05, + "loss": 0.5483, + "step": 1445 + }, + { + "epoch": 2.32, + "grad_norm": 0.734438955783844, + "learning_rate": 2.4300494434824373e-05, + "loss": 0.6071, + "step": 1450 + }, + { + "epoch": 2.328, + "grad_norm": 0.8913635015487671, + "learning_rate": 2.37557488988552e-05, + "loss": 0.5099, + "step": 1455 + }, + { + "epoch": 2.336, + "grad_norm": 0.8349048495292664, + "learning_rate": 2.321635444694028e-05, + "loss": 0.5186, + "step": 1460 + }, + { + "epoch": 2.344, + "grad_norm": 0.6164011359214783, + "learning_rate": 2.2682348935639274e-05, + "loss": 0.5043, + "step": 1465 + }, + { + "epoch": 2.352, + "grad_norm": 1.044892430305481, + "learning_rate": 2.2153769843297667e-05, + "loss": 0.61, + "step": 1470 + }, + { + "epoch": 2.36, + "grad_norm": 0.9142879247665405, + "learning_rate": 2.163065426741603e-05, + "loss": 0.5987, + "step": 1475 + }, + { + "epoch": 2.368, + "grad_norm": 0.6232836842536926, + "learning_rate": 2.1113038922046602e-05, + "loss": 0.5212, + "step": 1480 + }, + { + "epoch": 2.376, + "grad_norm": 0.49558231234550476, + "learning_rate": 2.0600960135216462e-05, + "loss": 0.4796, + "step": 1485 + }, + { + "epoch": 2.384, + "grad_norm": 0.7887687683105469, + "learning_rate": 2.009445384637805e-05, + "loss": 0.4844, + "step": 1490 + }, + { + "epoch": 2.392, + "grad_norm": 0.8086990714073181, + "learning_rate": 1.9593555603886538e-05, + "loss": 0.5085, + "step": 1495 + }, + { + "epoch": 2.4, + "grad_norm": 0.6713303327560425, + "learning_rate": 1.9098300562505266e-05, + "loss": 0.4839, + "step": 1500 + }, + { + "epoch": 2.408, + "grad_norm": 0.6262741684913635, + "learning_rate": 1.8608723480938206e-05, + "loss": 0.5715, + "step": 1505 + }, + { + "epoch": 2.416, + "grad_norm": 0.8025808334350586, + "learning_rate": 1.812485871939056e-05, + "loss": 0.5266, + "step": 1510 + }, + { + "epoch": 2.424, + "grad_norm": 0.8753231167793274, + "learning_rate": 1.7646740237157256e-05, + "loss": 0.5422, + "step": 1515 + }, + { + "epoch": 2.432, + "grad_norm": 0.6459301710128784, + "learning_rate": 1.7174401590239587e-05, + "loss": 0.5553, + "step": 1520 + }, + { + "epoch": 2.44, + "grad_norm": 0.6917416453361511, + "learning_rate": 1.6707875928990058e-05, + "loss": 0.5765, + "step": 1525 + }, + { + "epoch": 2.448, + "grad_norm": 0.7890029549598694, + "learning_rate": 1.6247195995785837e-05, + "loss": 0.549, + "step": 1530 + }, + { + "epoch": 2.456, + "grad_norm": 0.9913660883903503, + "learning_rate": 1.579239412273078e-05, + "loss": 0.4876, + "step": 1535 + }, + { + "epoch": 2.464, + "grad_norm": 0.9030985832214355, + "learning_rate": 1.5343502229386207e-05, + "loss": 0.5546, + "step": 1540 + }, + { + "epoch": 2.472, + "grad_norm": 0.9133403301239014, + "learning_rate": 1.4900551820530828e-05, + "loss": 0.5356, + "step": 1545 + }, + { + "epoch": 2.48, + "grad_norm": 0.7083793878555298, + "learning_rate": 1.4463573983949341e-05, + "loss": 0.5142, + "step": 1550 + }, + { + "epoch": 2.488, + "grad_norm": 1.095435619354248, + "learning_rate": 1.40325993882509e-05, + "loss": 0.6054, + "step": 1555 + }, + { + "epoch": 2.496, + "grad_norm": 0.8825190663337708, + "learning_rate": 1.3607658280716473e-05, + "loss": 0.5294, + "step": 1560 + }, + { + "epoch": 2.504, + "grad_norm": 0.9436343908309937, + "learning_rate": 1.3188780485176088e-05, + "loss": 0.5294, + "step": 1565 + }, + { + "epoch": 2.512, + "grad_norm": 1.0125439167022705, + "learning_rate": 1.2775995399915631e-05, + "loss": 0.4905, + "step": 1570 + }, + { + "epoch": 2.52, + "grad_norm": 0.8476350903511047, + "learning_rate": 1.2369331995613665e-05, + "loss": 0.5186, + "step": 1575 + }, + { + "epoch": 2.528, + "grad_norm": 0.9092681407928467, + "learning_rate": 1.196881881330798e-05, + "loss": 0.4909, + "step": 1580 + }, + { + "epoch": 2.536, + "grad_norm": 0.7970360517501831, + "learning_rate": 1.1574483962392767e-05, + "loss": 0.5303, + "step": 1585 + }, + { + "epoch": 2.544, + "grad_norm": 0.8575041890144348, + "learning_rate": 1.1186355118645554e-05, + "loss": 0.5169, + "step": 1590 + }, + { + "epoch": 2.552, + "grad_norm": 0.7397408485412598, + "learning_rate": 1.0804459522284926e-05, + "loss": 0.5339, + "step": 1595 + }, + { + "epoch": 2.56, + "grad_norm": 0.7415968179702759, + "learning_rate": 1.042882397605871e-05, + "loss": 0.5283, + "step": 1600 + } + ], + "logging_steps": 5, + "max_steps": 1875, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 9.97940656031662e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1600/training_args.bin b/checkpoint-1600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b72392a9094690809605a9b89988a05f68c770c0 --- /dev/null +++ b/checkpoint-1600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f6303cd7c1754cff6fd463f0e4dddf2a566ba7f29bbf2a72fbb560b32d5ce98 +size 5432 diff --git a/checkpoint-1700/README.md b/checkpoint-1700/README.md new file mode 100644 index 0000000000000000000000000000000000000000..31d07dbe00996b0568fd0eb91d05ac9a95722459 --- /dev/null +++ b/checkpoint-1700/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.2-1B +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-1700/adapter_config.json b/checkpoint-1700/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73202e5c7ba7132c890e52da7442b36a9efff4dd --- /dev/null +++ b/checkpoint-1700/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.2-1B", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "down_proj", + "k_proj", + "gate_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1700/adapter_model.safetensors b/checkpoint-1700/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d055100aa17299c05a0077273e9be7f1393e1e35 --- /dev/null +++ b/checkpoint-1700/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:270ec35d767b39a3b93e3419f81244f050a2770e9d1c9736aefcb0d7f50e0c3a +size 22573704 diff --git a/checkpoint-1700/optimizer.pt b/checkpoint-1700/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0c51497804067168da2cc30ebb8bd4f7aff01ad --- /dev/null +++ b/checkpoint-1700/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3868031bb03724af6642cdb7cac87eca1b9fbb14c5c57acb56307bb79465c50d +size 45276986 diff --git a/checkpoint-1700/rng_state.pth b/checkpoint-1700/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..323d589b0258d085c7a07b0c4f97b26796e865c5 --- /dev/null +++ b/checkpoint-1700/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9196a1e708bf24d6abba41cce3f8558820acc3e50f9394c5955e29eb41ffea3d +size 14244 diff --git a/checkpoint-1700/scheduler.pt b/checkpoint-1700/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f8624eebc36c0010107fd6b864b21b7c1865182a --- /dev/null +++ b/checkpoint-1700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c3eec6f9e7ec8904e865b68e83b407d99aebb49c71b712ac6c8770848a2d2ed +size 1064 diff --git a/checkpoint-1700/special_tokens_map.json b/checkpoint-1700/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/checkpoint-1700/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/checkpoint-1700/tokenizer.json b/checkpoint-1700/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-1700/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-1700/tokenizer_config.json b/checkpoint-1700/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61c03bf2b5e0c03056cebf7a58c8c096dafc1ffc --- /dev/null +++ b/checkpoint-1700/tokenizer_config.json @@ -0,0 +1,2065 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end_of_text|>' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|end_of_text|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1700/trainer_state.json b/checkpoint-1700/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..62643c7719b60a79838e4353842b8c7280142f17 --- /dev/null +++ b/checkpoint-1700/trainer_state.json @@ -0,0 +1,2413 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.7199999999999998, + "eval_steps": 500, + "global_step": 1700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008, + "grad_norm": 0.8485889434814453, + "learning_rate": 0.0001999964908278481, + "loss": 1.2049, + "step": 5 + }, + { + "epoch": 0.016, + "grad_norm": 0.47789862751960754, + "learning_rate": 0.00019998596355767805, + "loss": 0.9333, + "step": 10 + }, + { + "epoch": 0.024, + "grad_norm": 1.017558217048645, + "learning_rate": 0.00019996841892833, + "loss": 0.8671, + "step": 15 + }, + { + "epoch": 0.032, + "grad_norm": 0.6610977053642273, + "learning_rate": 0.00019994385817114646, + "loss": 0.7979, + "step": 20 + }, + { + "epoch": 0.04, + "grad_norm": 0.6075429320335388, + "learning_rate": 0.00019991228300988585, + "loss": 0.7662, + "step": 25 + }, + { + "epoch": 0.048, + "grad_norm": 0.6595763564109802, + "learning_rate": 0.00019987369566060176, + "loss": 0.7929, + "step": 30 + }, + { + "epoch": 0.056, + "grad_norm": 0.6968618035316467, + "learning_rate": 0.00019982809883148722, + "loss": 0.7683, + "step": 35 + }, + { + "epoch": 0.064, + "grad_norm": 0.4889592230319977, + "learning_rate": 0.00019977549572268468, + "loss": 0.8667, + "step": 40 + }, + { + "epoch": 0.072, + "grad_norm": 0.6651108264923096, + "learning_rate": 0.0001997158900260614, + "loss": 0.8446, + "step": 45 + }, + { + "epoch": 0.08, + "grad_norm": 0.5898510217666626, + "learning_rate": 0.00019964928592495045, + "loss": 0.9051, + "step": 50 + }, + { + "epoch": 0.088, + "grad_norm": 0.4398016035556793, + "learning_rate": 0.00019957568809385694, + "loss": 0.7235, + "step": 55 + }, + { + "epoch": 0.096, + "grad_norm": 0.6901968121528625, + "learning_rate": 0.00019949510169813003, + "loss": 0.8169, + "step": 60 + }, + { + "epoch": 0.104, + "grad_norm": 0.6267213225364685, + "learning_rate": 0.00019940753239360047, + "loss": 0.8266, + "step": 65 + }, + { + "epoch": 0.112, + "grad_norm": 0.48524895310401917, + "learning_rate": 0.00019931298632618356, + "loss": 0.758, + "step": 70 + }, + { + "epoch": 0.12, + "grad_norm": 0.5294132232666016, + "learning_rate": 0.0001992114701314478, + "loss": 0.7759, + "step": 75 + }, + { + "epoch": 0.128, + "grad_norm": 0.48957982659339905, + "learning_rate": 0.0001991029909341493, + "loss": 0.7797, + "step": 80 + }, + { + "epoch": 0.136, + "grad_norm": 0.645412802696228, + "learning_rate": 0.00019898755634773158, + "loss": 0.7437, + "step": 85 + }, + { + "epoch": 0.144, + "grad_norm": 0.43297675251960754, + "learning_rate": 0.0001988651744737914, + "loss": 0.8043, + "step": 90 + }, + { + "epoch": 0.152, + "grad_norm": 0.5513920783996582, + "learning_rate": 0.00019873585390151003, + "loss": 0.7701, + "step": 95 + }, + { + "epoch": 0.16, + "grad_norm": 0.8462435007095337, + "learning_rate": 0.0001985996037070505, + "loss": 0.709, + "step": 100 + }, + { + "epoch": 0.168, + "grad_norm": 0.6892585158348083, + "learning_rate": 0.00019845643345292054, + "loss": 0.7377, + "step": 105 + }, + { + "epoch": 0.176, + "grad_norm": 0.4617864191532135, + "learning_rate": 0.00019830635318730154, + "loss": 0.8352, + "step": 110 + }, + { + "epoch": 0.184, + "grad_norm": 0.6300354599952698, + "learning_rate": 0.0001981493734433433, + "loss": 0.7738, + "step": 115 + }, + { + "epoch": 0.192, + "grad_norm": 0.8086859583854675, + "learning_rate": 0.0001979855052384247, + "loss": 0.8067, + "step": 120 + }, + { + "epoch": 0.2, + "grad_norm": 0.6272985935211182, + "learning_rate": 0.00019781476007338058, + "loss": 0.7456, + "step": 125 + }, + { + "epoch": 0.208, + "grad_norm": 0.44750839471817017, + "learning_rate": 0.00019763714993169452, + "loss": 0.758, + "step": 130 + }, + { + "epoch": 0.216, + "grad_norm": 0.5053977370262146, + "learning_rate": 0.00019745268727865774, + "loss": 0.7895, + "step": 135 + }, + { + "epoch": 0.224, + "grad_norm": 0.41920769214630127, + "learning_rate": 0.00019726138506049438, + "loss": 0.7302, + "step": 140 + }, + { + "epoch": 0.232, + "grad_norm": 0.38280290365219116, + "learning_rate": 0.00019706325670345275, + "loss": 0.8152, + "step": 145 + }, + { + "epoch": 0.24, + "grad_norm": 0.554710865020752, + "learning_rate": 0.0001968583161128631, + "loss": 0.8461, + "step": 150 + }, + { + "epoch": 0.248, + "grad_norm": 0.5612509250640869, + "learning_rate": 0.00019664657767216176, + "loss": 0.7787, + "step": 155 + }, + { + "epoch": 0.256, + "grad_norm": 0.610614538192749, + "learning_rate": 0.00019642805624188147, + "loss": 0.7574, + "step": 160 + }, + { + "epoch": 0.264, + "grad_norm": 0.679517924785614, + "learning_rate": 0.0001962027671586086, + "loss": 0.8487, + "step": 165 + }, + { + "epoch": 0.272, + "grad_norm": 0.6685434579849243, + "learning_rate": 0.00019597072623390668, + "loss": 0.6611, + "step": 170 + }, + { + "epoch": 0.28, + "grad_norm": 0.480293869972229, + "learning_rate": 0.00019573194975320673, + "loss": 0.7802, + "step": 175 + }, + { + "epoch": 0.288, + "grad_norm": 0.7727369070053101, + "learning_rate": 0.00019548645447466431, + "loss": 0.6727, + "step": 180 + }, + { + "epoch": 0.296, + "grad_norm": 0.6371043920516968, + "learning_rate": 0.00019523425762798329, + "loss": 0.7502, + "step": 185 + }, + { + "epoch": 0.304, + "grad_norm": 0.6399966478347778, + "learning_rate": 0.00019497537691320668, + "loss": 0.8401, + "step": 190 + }, + { + "epoch": 0.312, + "grad_norm": 0.7263137698173523, + "learning_rate": 0.00019470983049947444, + "loss": 0.7494, + "step": 195 + }, + { + "epoch": 0.32, + "grad_norm": 0.402416467666626, + "learning_rate": 0.00019443763702374812, + "loss": 0.7842, + "step": 200 + }, + { + "epoch": 0.328, + "grad_norm": 0.6639626026153564, + "learning_rate": 0.00019415881558950302, + "loss": 0.8082, + "step": 205 + }, + { + "epoch": 0.336, + "grad_norm": 0.5801042914390564, + "learning_rate": 0.00019387338576538744, + "loss": 0.7883, + "step": 210 + }, + { + "epoch": 0.344, + "grad_norm": 0.5533607006072998, + "learning_rate": 0.00019358136758384912, + "loss": 0.7356, + "step": 215 + }, + { + "epoch": 0.352, + "grad_norm": 0.6019654273986816, + "learning_rate": 0.00019328278153972947, + "loss": 0.7891, + "step": 220 + }, + { + "epoch": 0.36, + "grad_norm": 0.5344104170799255, + "learning_rate": 0.00019297764858882514, + "loss": 0.7671, + "step": 225 + }, + { + "epoch": 0.368, + "grad_norm": 0.5494843125343323, + "learning_rate": 0.0001926659901464172, + "loss": 0.6608, + "step": 230 + }, + { + "epoch": 0.376, + "grad_norm": 0.465420126914978, + "learning_rate": 0.00019234782808576824, + "loss": 0.647, + "step": 235 + }, + { + "epoch": 0.384, + "grad_norm": 0.5202775001525879, + "learning_rate": 0.00019202318473658705, + "loss": 0.729, + "step": 240 + }, + { + "epoch": 0.392, + "grad_norm": 0.5757818222045898, + "learning_rate": 0.00019169208288346166, + "loss": 0.6713, + "step": 245 + }, + { + "epoch": 0.4, + "grad_norm": 0.46555572748184204, + "learning_rate": 0.0001913545457642601, + "loss": 0.7049, + "step": 250 + }, + { + "epoch": 0.408, + "grad_norm": 0.5101790428161621, + "learning_rate": 0.00019101059706849957, + "loss": 0.7419, + "step": 255 + }, + { + "epoch": 0.416, + "grad_norm": 0.6083744764328003, + "learning_rate": 0.00019066026093568378, + "loss": 0.7148, + "step": 260 + }, + { + "epoch": 0.424, + "grad_norm": 0.4719640612602234, + "learning_rate": 0.00019030356195360874, + "loss": 0.7493, + "step": 265 + }, + { + "epoch": 0.432, + "grad_norm": 0.7365225553512573, + "learning_rate": 0.0001899405251566371, + "loss": 0.7652, + "step": 270 + }, + { + "epoch": 0.44, + "grad_norm": 0.4452705383300781, + "learning_rate": 0.0001895711760239413, + "loss": 0.7438, + "step": 275 + }, + { + "epoch": 0.448, + "grad_norm": 0.6071786284446716, + "learning_rate": 0.0001891955404777151, + "loss": 0.7683, + "step": 280 + }, + { + "epoch": 0.456, + "grad_norm": 0.5774498581886292, + "learning_rate": 0.00018881364488135448, + "loss": 0.8115, + "step": 285 + }, + { + "epoch": 0.464, + "grad_norm": 0.6134682893753052, + "learning_rate": 0.00018842551603760724, + "loss": 0.8335, + "step": 290 + }, + { + "epoch": 0.472, + "grad_norm": 0.4869893193244934, + "learning_rate": 0.00018803118118669202, + "loss": 0.6933, + "step": 295 + }, + { + "epoch": 0.48, + "grad_norm": 0.6457111239433289, + "learning_rate": 0.00018763066800438636, + "loss": 0.7515, + "step": 300 + }, + { + "epoch": 0.488, + "grad_norm": 0.59674471616745, + "learning_rate": 0.0001872240046000844, + "loss": 0.6931, + "step": 305 + }, + { + "epoch": 0.496, + "grad_norm": 0.44608160853385925, + "learning_rate": 0.00018681121951482393, + "loss": 0.782, + "step": 310 + }, + { + "epoch": 0.504, + "grad_norm": 0.5934664607048035, + "learning_rate": 0.00018639234171928353, + "loss": 0.7361, + "step": 315 + }, + { + "epoch": 0.512, + "grad_norm": 0.49716323614120483, + "learning_rate": 0.0001859674006117491, + "loss": 0.7443, + "step": 320 + }, + { + "epoch": 0.52, + "grad_norm": 0.47995495796203613, + "learning_rate": 0.00018553642601605068, + "loss": 0.7221, + "step": 325 + }, + { + "epoch": 0.528, + "grad_norm": 0.5177399516105652, + "learning_rate": 0.00018509944817946922, + "loss": 0.7622, + "step": 330 + }, + { + "epoch": 0.536, + "grad_norm": 0.6638798713684082, + "learning_rate": 0.0001846564977706138, + "loss": 0.8556, + "step": 335 + }, + { + "epoch": 0.544, + "grad_norm": 0.5056771636009216, + "learning_rate": 0.00018420760587726923, + "loss": 0.7814, + "step": 340 + }, + { + "epoch": 0.552, + "grad_norm": 0.44543707370758057, + "learning_rate": 0.0001837528040042142, + "loss": 0.722, + "step": 345 + }, + { + "epoch": 0.56, + "grad_norm": 0.6765120625495911, + "learning_rate": 0.00018329212407100994, + "loss": 0.7903, + "step": 350 + }, + { + "epoch": 0.568, + "grad_norm": 0.49232372641563416, + "learning_rate": 0.00018282559840976042, + "loss": 0.6996, + "step": 355 + }, + { + "epoch": 0.576, + "grad_norm": 0.47392791509628296, + "learning_rate": 0.00018235325976284275, + "loss": 0.773, + "step": 360 + }, + { + "epoch": 0.584, + "grad_norm": 0.5056615471839905, + "learning_rate": 0.00018187514128060946, + "loss": 0.728, + "step": 365 + }, + { + "epoch": 0.592, + "grad_norm": 0.5857616662979126, + "learning_rate": 0.00018139127651906184, + "loss": 0.7659, + "step": 370 + }, + { + "epoch": 0.6, + "grad_norm": 0.5966864228248596, + "learning_rate": 0.00018090169943749476, + "loss": 0.7039, + "step": 375 + }, + { + "epoch": 0.608, + "grad_norm": 0.4524347484111786, + "learning_rate": 0.00018040644439611348, + "loss": 0.7125, + "step": 380 + }, + { + "epoch": 0.616, + "grad_norm": 0.5570976138114929, + "learning_rate": 0.00017990554615362198, + "loss": 0.698, + "step": 385 + }, + { + "epoch": 0.624, + "grad_norm": 0.6045777201652527, + "learning_rate": 0.00017939903986478355, + "loss": 0.8255, + "step": 390 + }, + { + "epoch": 0.632, + "grad_norm": 0.6149687767028809, + "learning_rate": 0.00017888696107795342, + "loss": 0.6616, + "step": 395 + }, + { + "epoch": 0.64, + "grad_norm": 0.4873579144477844, + "learning_rate": 0.000178369345732584, + "loss": 0.7452, + "step": 400 + }, + { + "epoch": 0.648, + "grad_norm": 0.5569061636924744, + "learning_rate": 0.00017784623015670238, + "loss": 0.7652, + "step": 405 + }, + { + "epoch": 0.656, + "grad_norm": 0.5825181603431702, + "learning_rate": 0.00017731765106436073, + "loss": 0.7793, + "step": 410 + }, + { + "epoch": 0.664, + "grad_norm": 0.4047383666038513, + "learning_rate": 0.00017678364555305978, + "loss": 0.6875, + "step": 415 + }, + { + "epoch": 0.672, + "grad_norm": 0.5080836415290833, + "learning_rate": 0.0001762442511011448, + "loss": 0.7465, + "step": 420 + }, + { + "epoch": 0.68, + "grad_norm": 0.5825940370559692, + "learning_rate": 0.00017569950556517566, + "loss": 0.7205, + "step": 425 + }, + { + "epoch": 0.688, + "grad_norm": 0.476992666721344, + "learning_rate": 0.00017514944717726962, + "loss": 0.6589, + "step": 430 + }, + { + "epoch": 0.696, + "grad_norm": 0.7424727082252502, + "learning_rate": 0.00017459411454241822, + "loss": 0.7035, + "step": 435 + }, + { + "epoch": 0.704, + "grad_norm": 0.6544787287712097, + "learning_rate": 0.00017403354663577783, + "loss": 0.787, + "step": 440 + }, + { + "epoch": 0.712, + "grad_norm": 0.49425187706947327, + "learning_rate": 0.00017346778279993415, + "loss": 0.7515, + "step": 445 + }, + { + "epoch": 0.72, + "grad_norm": 0.5473236441612244, + "learning_rate": 0.00017289686274214118, + "loss": 0.7199, + "step": 450 + }, + { + "epoch": 0.728, + "grad_norm": 0.6773544549942017, + "learning_rate": 0.00017232082653153422, + "loss": 0.8037, + "step": 455 + }, + { + "epoch": 0.736, + "grad_norm": 0.6355096101760864, + "learning_rate": 0.00017173971459631787, + "loss": 0.7502, + "step": 460 + }, + { + "epoch": 0.744, + "grad_norm": 0.47867000102996826, + "learning_rate": 0.00017115356772092857, + "loss": 0.7446, + "step": 465 + }, + { + "epoch": 0.752, + "grad_norm": 0.5135357975959778, + "learning_rate": 0.0001705624270431721, + "loss": 0.6507, + "step": 470 + }, + { + "epoch": 0.76, + "grad_norm": 0.48866042494773865, + "learning_rate": 0.00016996633405133655, + "loss": 0.7164, + "step": 475 + }, + { + "epoch": 0.768, + "grad_norm": 0.5892354249954224, + "learning_rate": 0.0001693653305812805, + "loss": 0.7621, + "step": 480 + }, + { + "epoch": 0.776, + "grad_norm": 0.6633970141410828, + "learning_rate": 0.00016875945881349676, + "loss": 0.7623, + "step": 485 + }, + { + "epoch": 0.784, + "grad_norm": 0.6444060802459717, + "learning_rate": 0.000168148761270152, + "loss": 0.6606, + "step": 490 + }, + { + "epoch": 0.792, + "grad_norm": 0.7012648582458496, + "learning_rate": 0.00016753328081210245, + "loss": 0.6941, + "step": 495 + }, + { + "epoch": 0.8, + "grad_norm": 0.7064160704612732, + "learning_rate": 0.00016691306063588583, + "loss": 0.6841, + "step": 500 + }, + { + "epoch": 0.808, + "grad_norm": 0.7241398096084595, + "learning_rate": 0.00016628814427068953, + "loss": 0.6996, + "step": 505 + }, + { + "epoch": 0.816, + "grad_norm": 0.7807374596595764, + "learning_rate": 0.00016565857557529566, + "loss": 0.7542, + "step": 510 + }, + { + "epoch": 0.824, + "grad_norm": 0.763768196105957, + "learning_rate": 0.00016502439873500289, + "loss": 0.7175, + "step": 515 + }, + { + "epoch": 0.832, + "grad_norm": 0.6105090379714966, + "learning_rate": 0.0001643856582585254, + "loss": 0.7565, + "step": 520 + }, + { + "epoch": 0.84, + "grad_norm": 0.5686540603637695, + "learning_rate": 0.000163742398974869, + "loss": 0.7339, + "step": 525 + }, + { + "epoch": 0.848, + "grad_norm": 0.5341500043869019, + "learning_rate": 0.00016309466603018496, + "loss": 0.569, + "step": 530 + }, + { + "epoch": 0.856, + "grad_norm": 0.7274748682975769, + "learning_rate": 0.00016244250488460158, + "loss": 0.7556, + "step": 535 + }, + { + "epoch": 0.864, + "grad_norm": 0.7321165204048157, + "learning_rate": 0.00016178596130903344, + "loss": 0.7084, + "step": 540 + }, + { + "epoch": 0.872, + "grad_norm": 0.5086159110069275, + "learning_rate": 0.00016112508138196917, + "loss": 0.6935, + "step": 545 + }, + { + "epoch": 0.88, + "grad_norm": 0.4714389443397522, + "learning_rate": 0.0001604599114862375, + "loss": 0.7076, + "step": 550 + }, + { + "epoch": 0.888, + "grad_norm": 0.5031452178955078, + "learning_rate": 0.0001597904983057519, + "loss": 0.7151, + "step": 555 + }, + { + "epoch": 0.896, + "grad_norm": 0.7745943665504456, + "learning_rate": 0.0001591168888222342, + "loss": 0.7001, + "step": 560 + }, + { + "epoch": 0.904, + "grad_norm": 0.6076303124427795, + "learning_rate": 0.00015843913031191723, + "loss": 0.7285, + "step": 565 + }, + { + "epoch": 0.912, + "grad_norm": 0.7456529140472412, + "learning_rate": 0.00015775727034222675, + "loss": 0.8041, + "step": 570 + }, + { + "epoch": 0.92, + "grad_norm": 0.5760998725891113, + "learning_rate": 0.0001570713567684432, + "loss": 0.7353, + "step": 575 + }, + { + "epoch": 0.928, + "grad_norm": 0.7057327032089233, + "learning_rate": 0.00015638143773034267, + "loss": 0.7792, + "step": 580 + }, + { + "epoch": 0.936, + "grad_norm": 0.7615967392921448, + "learning_rate": 0.00015568756164881882, + "loss": 1.0121, + "step": 585 + }, + { + "epoch": 0.944, + "grad_norm": 0.6304950714111328, + "learning_rate": 0.000154989777222484, + "loss": 0.7727, + "step": 590 + }, + { + "epoch": 0.952, + "grad_norm": 0.6852543950080872, + "learning_rate": 0.00015428813342425177, + "loss": 0.741, + "step": 595 + }, + { + "epoch": 0.96, + "grad_norm": 0.6379660964012146, + "learning_rate": 0.00015358267949789966, + "loss": 0.6919, + "step": 600 + }, + { + "epoch": 0.968, + "grad_norm": 0.5846463441848755, + "learning_rate": 0.00015287346495461315, + "loss": 0.7163, + "step": 605 + }, + { + "epoch": 0.976, + "grad_norm": 0.5999557971954346, + "learning_rate": 0.0001521605395695108, + "loss": 0.8152, + "step": 610 + }, + { + "epoch": 0.984, + "grad_norm": 0.5806307196617126, + "learning_rate": 0.00015144395337815064, + "loss": 0.6709, + "step": 615 + }, + { + "epoch": 0.992, + "grad_norm": 0.6559942960739136, + "learning_rate": 0.00015072375667301893, + "loss": 0.6527, + "step": 620 + }, + { + "epoch": 1.0, + "grad_norm": 0.6287715435028076, + "learning_rate": 0.00015000000000000001, + "loss": 0.8194, + "step": 625 + }, + { + "epoch": 1.008, + "grad_norm": 0.616222620010376, + "learning_rate": 0.00014927273415482915, + "loss": 0.6627, + "step": 630 + }, + { + "epoch": 1.016, + "grad_norm": 0.4750412106513977, + "learning_rate": 0.0001485420101795274, + "loss": 0.6366, + "step": 635 + }, + { + "epoch": 1.024, + "grad_norm": 0.5122964978218079, + "learning_rate": 0.00014780787935881923, + "loss": 0.6717, + "step": 640 + }, + { + "epoch": 1.032, + "grad_norm": 0.7382633090019226, + "learning_rate": 0.0001470703932165333, + "loss": 0.6483, + "step": 645 + }, + { + "epoch": 1.04, + "grad_norm": 0.6540554761886597, + "learning_rate": 0.00014632960351198618, + "loss": 0.6151, + "step": 650 + }, + { + "epoch": 1.048, + "grad_norm": 0.4776591956615448, + "learning_rate": 0.00014558556223635003, + "loss": 0.6707, + "step": 655 + }, + { + "epoch": 1.056, + "grad_norm": 0.8012662529945374, + "learning_rate": 0.00014483832160900326, + "loss": 0.6125, + "step": 660 + }, + { + "epoch": 1.064, + "grad_norm": 0.6735953092575073, + "learning_rate": 0.00014408793407386588, + "loss": 0.6206, + "step": 665 + }, + { + "epoch": 1.072, + "grad_norm": 0.5640230774879456, + "learning_rate": 0.00014333445229571873, + "loss": 0.6161, + "step": 670 + }, + { + "epoch": 1.08, + "grad_norm": 0.5928654074668884, + "learning_rate": 0.00014257792915650728, + "loss": 0.6583, + "step": 675 + }, + { + "epoch": 1.088, + "grad_norm": 0.7347397208213806, + "learning_rate": 0.00014181841775163013, + "loss": 0.6222, + "step": 680 + }, + { + "epoch": 1.096, + "grad_norm": 0.593773365020752, + "learning_rate": 0.0001410559713862128, + "loss": 0.716, + "step": 685 + }, + { + "epoch": 1.104, + "grad_norm": 0.6244611144065857, + "learning_rate": 0.00014029064357136628, + "loss": 0.6198, + "step": 690 + }, + { + "epoch": 1.112, + "grad_norm": 0.5083370804786682, + "learning_rate": 0.00013952248802043165, + "loss": 0.6389, + "step": 695 + }, + { + "epoch": 1.12, + "grad_norm": 0.5241413116455078, + "learning_rate": 0.0001387515586452103, + "loss": 0.6842, + "step": 700 + }, + { + "epoch": 1.1280000000000001, + "grad_norm": 0.524029016494751, + "learning_rate": 0.00013797790955218014, + "loss": 0.6071, + "step": 705 + }, + { + "epoch": 1.1360000000000001, + "grad_norm": 0.5097878575325012, + "learning_rate": 0.00013720159503869815, + "loss": 0.5915, + "step": 710 + }, + { + "epoch": 1.144, + "grad_norm": 0.5782963037490845, + "learning_rate": 0.00013642266958918984, + "loss": 0.6794, + "step": 715 + }, + { + "epoch": 1.152, + "grad_norm": 0.6088266372680664, + "learning_rate": 0.00013564118787132506, + "loss": 0.6773, + "step": 720 + }, + { + "epoch": 1.16, + "grad_norm": 0.7768995761871338, + "learning_rate": 0.00013485720473218154, + "loss": 0.668, + "step": 725 + }, + { + "epoch": 1.168, + "grad_norm": 0.6645551919937134, + "learning_rate": 0.0001340707751943952, + "loss": 0.6997, + "step": 730 + }, + { + "epoch": 1.176, + "grad_norm": 0.9228842258453369, + "learning_rate": 0.00013328195445229868, + "loss": 0.831, + "step": 735 + }, + { + "epoch": 1.184, + "grad_norm": 0.7556049823760986, + "learning_rate": 0.00013249079786804765, + "loss": 0.6378, + "step": 740 + }, + { + "epoch": 1.192, + "grad_norm": 0.832775354385376, + "learning_rate": 0.0001316973609677352, + "loss": 0.6547, + "step": 745 + }, + { + "epoch": 1.2, + "grad_norm": 0.7329304814338684, + "learning_rate": 0.00013090169943749476, + "loss": 0.5808, + "step": 750 + }, + { + "epoch": 1.208, + "grad_norm": 0.7193475961685181, + "learning_rate": 0.00013010386911959206, + "loss": 0.5582, + "step": 755 + }, + { + "epoch": 1.216, + "grad_norm": 0.6274734735488892, + "learning_rate": 0.00012930392600850573, + "loss": 0.5801, + "step": 760 + }, + { + "epoch": 1.224, + "grad_norm": 0.6485865712165833, + "learning_rate": 0.0001285019262469976, + "loss": 0.65, + "step": 765 + }, + { + "epoch": 1.232, + "grad_norm": 0.7164427042007446, + "learning_rate": 0.00012769792612217224, + "loss": 0.6627, + "step": 770 + }, + { + "epoch": 1.24, + "grad_norm": 0.600775957107544, + "learning_rate": 0.00012689198206152657, + "loss": 0.5603, + "step": 775 + }, + { + "epoch": 1.248, + "grad_norm": 0.8377975225448608, + "learning_rate": 0.00012608415062898972, + "loss": 0.6525, + "step": 780 + }, + { + "epoch": 1.256, + "grad_norm": 0.8069924116134644, + "learning_rate": 0.00012527448852095295, + "loss": 0.6731, + "step": 785 + }, + { + "epoch": 1.264, + "grad_norm": 0.6501213908195496, + "learning_rate": 0.00012446305256229073, + "loss": 0.6255, + "step": 790 + }, + { + "epoch": 1.272, + "grad_norm": 0.62812340259552, + "learning_rate": 0.00012364989970237248, + "loss": 0.6585, + "step": 795 + }, + { + "epoch": 1.28, + "grad_norm": 0.5702307820320129, + "learning_rate": 0.00012283508701106557, + "loss": 0.5996, + "step": 800 + }, + { + "epoch": 1.288, + "grad_norm": 0.6311281323432922, + "learning_rate": 0.00012201867167473015, + "loss": 0.6355, + "step": 805 + }, + { + "epoch": 1.296, + "grad_norm": 0.5885419249534607, + "learning_rate": 0.00012120071099220549, + "loss": 0.6615, + "step": 810 + }, + { + "epoch": 1.304, + "grad_norm": 0.5239307284355164, + "learning_rate": 0.0001203812623707885, + "loss": 0.6096, + "step": 815 + }, + { + "epoch": 1.312, + "grad_norm": 0.6101869940757751, + "learning_rate": 0.00011956038332220483, + "loss": 0.5984, + "step": 820 + }, + { + "epoch": 1.32, + "grad_norm": 0.4395413100719452, + "learning_rate": 0.00011873813145857249, + "loss": 0.5569, + "step": 825 + }, + { + "epoch": 1.328, + "grad_norm": 0.8984820246696472, + "learning_rate": 0.00011791456448835825, + "loss": 0.7088, + "step": 830 + }, + { + "epoch": 1.336, + "grad_norm": 0.7709664106369019, + "learning_rate": 0.00011708974021232769, + "loss": 0.6731, + "step": 835 + }, + { + "epoch": 1.3439999999999999, + "grad_norm": 0.6782217025756836, + "learning_rate": 0.00011626371651948838, + "loss": 0.6188, + "step": 840 + }, + { + "epoch": 1.3519999999999999, + "grad_norm": 0.6427358984947205, + "learning_rate": 0.00011543655138302714, + "loss": 0.7004, + "step": 845 + }, + { + "epoch": 1.3599999999999999, + "grad_norm": 0.5902594923973083, + "learning_rate": 0.00011460830285624118, + "loss": 0.5884, + "step": 850 + }, + { + "epoch": 1.3679999999999999, + "grad_norm": 0.5935835838317871, + "learning_rate": 0.0001137790290684638, + "loss": 0.5739, + "step": 855 + }, + { + "epoch": 1.376, + "grad_norm": 0.6752728223800659, + "learning_rate": 0.00011294878822098469, + "loss": 0.6435, + "step": 860 + }, + { + "epoch": 1.384, + "grad_norm": 0.7927135825157166, + "learning_rate": 0.00011211763858296507, + "loss": 0.6897, + "step": 865 + }, + { + "epoch": 1.392, + "grad_norm": 0.714499294757843, + "learning_rate": 0.00011128563848734816, + "loss": 0.6641, + "step": 870 + }, + { + "epoch": 1.4, + "grad_norm": 0.7086356282234192, + "learning_rate": 0.00011045284632676536, + "loss": 0.6273, + "step": 875 + }, + { + "epoch": 1.408, + "grad_norm": 0.6125518679618835, + "learning_rate": 0.00010961932054943778, + "loss": 0.6437, + "step": 880 + }, + { + "epoch": 1.416, + "grad_norm": 0.5635287165641785, + "learning_rate": 0.00010878511965507434, + "loss": 0.6345, + "step": 885 + }, + { + "epoch": 1.424, + "grad_norm": 0.47936007380485535, + "learning_rate": 0.00010795030219076599, + "loss": 0.5913, + "step": 890 + }, + { + "epoch": 1.432, + "grad_norm": 0.7142558097839355, + "learning_rate": 0.00010711492674687671, + "loss": 0.6482, + "step": 895 + }, + { + "epoch": 1.44, + "grad_norm": 0.5252729058265686, + "learning_rate": 0.00010627905195293135, + "loss": 0.6165, + "step": 900 + }, + { + "epoch": 1.448, + "grad_norm": 0.896318793296814, + "learning_rate": 0.00010544273647350092, + "loss": 0.634, + "step": 905 + }, + { + "epoch": 1.456, + "grad_norm": 0.6029036045074463, + "learning_rate": 0.00010460603900408523, + "loss": 0.6509, + "step": 910 + }, + { + "epoch": 1.464, + "grad_norm": 0.6835671663284302, + "learning_rate": 0.00010376901826699348, + "loss": 0.6212, + "step": 915 + }, + { + "epoch": 1.472, + "grad_norm": 0.7098750472068787, + "learning_rate": 0.00010293173300722285, + "loss": 0.7305, + "step": 920 + }, + { + "epoch": 1.48, + "grad_norm": 0.675316333770752, + "learning_rate": 0.0001020942419883357, + "loss": 0.6685, + "step": 925 + }, + { + "epoch": 1.488, + "grad_norm": 0.6675406098365784, + "learning_rate": 0.00010125660398833528, + "loss": 0.6214, + "step": 930 + }, + { + "epoch": 1.496, + "grad_norm": 0.6629154682159424, + "learning_rate": 0.0001004188777955404, + "loss": 0.6035, + "step": 935 + }, + { + "epoch": 1.504, + "grad_norm": 0.7732692360877991, + "learning_rate": 9.958112220445963e-05, + "loss": 0.5868, + "step": 940 + }, + { + "epoch": 1.512, + "grad_norm": 0.6238484978675842, + "learning_rate": 9.874339601166473e-05, + "loss": 0.6003, + "step": 945 + }, + { + "epoch": 1.52, + "grad_norm": 0.6622412800788879, + "learning_rate": 9.790575801166432e-05, + "loss": 0.5854, + "step": 950 + }, + { + "epoch": 1.528, + "grad_norm": 0.7598085999488831, + "learning_rate": 9.706826699277718e-05, + "loss": 0.5882, + "step": 955 + }, + { + "epoch": 1.536, + "grad_norm": 0.8870390057563782, + "learning_rate": 9.623098173300654e-05, + "loss": 0.7187, + "step": 960 + }, + { + "epoch": 1.544, + "grad_norm": 0.702232837677002, + "learning_rate": 9.539396099591476e-05, + "loss": 0.6156, + "step": 965 + }, + { + "epoch": 1.552, + "grad_norm": 0.694139301776886, + "learning_rate": 9.455726352649911e-05, + "loss": 0.6488, + "step": 970 + }, + { + "epoch": 1.56, + "grad_norm": 0.5684956312179565, + "learning_rate": 9.372094804706867e-05, + "loss": 0.6601, + "step": 975 + }, + { + "epoch": 1.568, + "grad_norm": 0.6327118277549744, + "learning_rate": 9.288507325312335e-05, + "loss": 0.5968, + "step": 980 + }, + { + "epoch": 1.576, + "grad_norm": 0.5732144117355347, + "learning_rate": 9.204969780923403e-05, + "loss": 0.7034, + "step": 985 + }, + { + "epoch": 1.584, + "grad_norm": 0.7025273442268372, + "learning_rate": 9.121488034492569e-05, + "loss": 0.5973, + "step": 990 + }, + { + "epoch": 1.592, + "grad_norm": 0.9270740151405334, + "learning_rate": 9.038067945056227e-05, + "loss": 0.7877, + "step": 995 + }, + { + "epoch": 1.6, + "grad_norm": 0.6676818132400513, + "learning_rate": 8.954715367323468e-05, + "loss": 0.644, + "step": 1000 + }, + { + "epoch": 1.608, + "grad_norm": 0.6687547564506531, + "learning_rate": 8.871436151265184e-05, + "loss": 0.6678, + "step": 1005 + }, + { + "epoch": 1.616, + "grad_norm": 0.7400322556495667, + "learning_rate": 8.788236141703498e-05, + "loss": 0.6088, + "step": 1010 + }, + { + "epoch": 1.624, + "grad_norm": 0.5504963994026184, + "learning_rate": 8.705121177901532e-05, + "loss": 0.6219, + "step": 1015 + }, + { + "epoch": 1.6320000000000001, + "grad_norm": 0.8088738322257996, + "learning_rate": 8.62209709315362e-05, + "loss": 0.6698, + "step": 1020 + }, + { + "epoch": 1.6400000000000001, + "grad_norm": 0.7380816340446472, + "learning_rate": 8.539169714375885e-05, + "loss": 0.6207, + "step": 1025 + }, + { + "epoch": 1.6480000000000001, + "grad_norm": 0.6346850395202637, + "learning_rate": 8.456344861697289e-05, + "loss": 0.626, + "step": 1030 + }, + { + "epoch": 1.6560000000000001, + "grad_norm": 0.49918702244758606, + "learning_rate": 8.373628348051165e-05, + "loss": 0.6972, + "step": 1035 + }, + { + "epoch": 1.6640000000000001, + "grad_norm": 0.7200607657432556, + "learning_rate": 8.291025978767235e-05, + "loss": 0.6282, + "step": 1040 + }, + { + "epoch": 1.6720000000000002, + "grad_norm": 0.8350688815116882, + "learning_rate": 8.208543551164178e-05, + "loss": 0.6219, + "step": 1045 + }, + { + "epoch": 1.6800000000000002, + "grad_norm": 0.5957468748092651, + "learning_rate": 8.126186854142752e-05, + "loss": 0.622, + "step": 1050 + }, + { + "epoch": 1.688, + "grad_norm": 0.7494757175445557, + "learning_rate": 8.04396166777952e-05, + "loss": 0.5801, + "step": 1055 + }, + { + "epoch": 1.696, + "grad_norm": 0.7109113931655884, + "learning_rate": 7.961873762921153e-05, + "loss": 0.598, + "step": 1060 + }, + { + "epoch": 1.704, + "grad_norm": 0.7941219210624695, + "learning_rate": 7.879928900779456e-05, + "loss": 0.699, + "step": 1065 + }, + { + "epoch": 1.712, + "grad_norm": 0.7282152771949768, + "learning_rate": 7.798132832526986e-05, + "loss": 0.5882, + "step": 1070 + }, + { + "epoch": 1.72, + "grad_norm": 0.5923735499382019, + "learning_rate": 7.716491298893442e-05, + "loss": 0.5321, + "step": 1075 + }, + { + "epoch": 1.728, + "grad_norm": 0.5571523308753967, + "learning_rate": 7.635010029762756e-05, + "loss": 0.6647, + "step": 1080 + }, + { + "epoch": 1.736, + "grad_norm": 0.6043043732643127, + "learning_rate": 7.553694743770928e-05, + "loss": 0.628, + "step": 1085 + }, + { + "epoch": 1.744, + "grad_norm": 0.8733720183372498, + "learning_rate": 7.472551147904708e-05, + "loss": 0.6262, + "step": 1090 + }, + { + "epoch": 1.752, + "grad_norm": 0.5561848878860474, + "learning_rate": 7.391584937101033e-05, + "loss": 0.6131, + "step": 1095 + }, + { + "epoch": 1.76, + "grad_norm": 0.6611356735229492, + "learning_rate": 7.310801793847344e-05, + "loss": 0.6494, + "step": 1100 + }, + { + "epoch": 1.768, + "grad_norm": 0.5700333714485168, + "learning_rate": 7.230207387782776e-05, + "loss": 0.5514, + "step": 1105 + }, + { + "epoch": 1.776, + "grad_norm": 0.705662727355957, + "learning_rate": 7.149807375300239e-05, + "loss": 0.5823, + "step": 1110 + }, + { + "epoch": 1.784, + "grad_norm": 0.9632449150085449, + "learning_rate": 7.069607399149428e-05, + "loss": 0.7207, + "step": 1115 + }, + { + "epoch": 1.792, + "grad_norm": 0.6412242650985718, + "learning_rate": 6.989613088040796e-05, + "loss": 0.7006, + "step": 1120 + }, + { + "epoch": 1.8, + "grad_norm": 0.538044273853302, + "learning_rate": 6.909830056250527e-05, + "loss": 0.6222, + "step": 1125 + }, + { + "epoch": 1.808, + "grad_norm": 0.6124762296676636, + "learning_rate": 6.830263903226483e-05, + "loss": 0.6569, + "step": 1130 + }, + { + "epoch": 1.8159999999999998, + "grad_norm": 0.7250857949256897, + "learning_rate": 6.750920213195238e-05, + "loss": 0.543, + "step": 1135 + }, + { + "epoch": 1.8239999999999998, + "grad_norm": 0.6840488910675049, + "learning_rate": 6.671804554770135e-05, + "loss": 0.6334, + "step": 1140 + }, + { + "epoch": 1.8319999999999999, + "grad_norm": 0.8932304382324219, + "learning_rate": 6.592922480560483e-05, + "loss": 0.6701, + "step": 1145 + }, + { + "epoch": 1.8399999999999999, + "grad_norm": 0.9220761656761169, + "learning_rate": 6.51427952678185e-05, + "loss": 0.6216, + "step": 1150 + }, + { + "epoch": 1.8479999999999999, + "grad_norm": 0.688731849193573, + "learning_rate": 6.435881212867493e-05, + "loss": 0.5877, + "step": 1155 + }, + { + "epoch": 1.8559999999999999, + "grad_norm": 0.6370134949684143, + "learning_rate": 6.357733041081018e-05, + "loss": 0.6256, + "step": 1160 + }, + { + "epoch": 1.8639999999999999, + "grad_norm": 0.7421153783798218, + "learning_rate": 6.27984049613019e-05, + "loss": 0.7062, + "step": 1165 + }, + { + "epoch": 1.8719999999999999, + "grad_norm": 0.8129108548164368, + "learning_rate": 6.20220904478199e-05, + "loss": 0.6304, + "step": 1170 + }, + { + "epoch": 1.88, + "grad_norm": 0.7866687774658203, + "learning_rate": 6.12484413547897e-05, + "loss": 0.7695, + "step": 1175 + }, + { + "epoch": 1.888, + "grad_norm": 0.8307198882102966, + "learning_rate": 6.047751197956838e-05, + "loss": 0.5723, + "step": 1180 + }, + { + "epoch": 1.896, + "grad_norm": 0.5541536211967468, + "learning_rate": 5.9709356428633746e-05, + "loss": 0.6847, + "step": 1185 + }, + { + "epoch": 1.904, + "grad_norm": 0.731555163860321, + "learning_rate": 5.8944028613787206e-05, + "loss": 0.6618, + "step": 1190 + }, + { + "epoch": 1.912, + "grad_norm": 1.020991325378418, + "learning_rate": 5.818158224836987e-05, + "loss": 0.6275, + "step": 1195 + }, + { + "epoch": 1.92, + "grad_norm": 0.7172287702560425, + "learning_rate": 5.7422070843492734e-05, + "loss": 0.5617, + "step": 1200 + }, + { + "epoch": 1.928, + "grad_norm": 0.7232096791267395, + "learning_rate": 5.666554770428129e-05, + "loss": 0.6278, + "step": 1205 + }, + { + "epoch": 1.936, + "grad_norm": 0.7234194278717041, + "learning_rate": 5.591206592613416e-05, + "loss": 0.6713, + "step": 1210 + }, + { + "epoch": 1.944, + "grad_norm": 0.6906010508537292, + "learning_rate": 5.5161678390996796e-05, + "loss": 0.6113, + "step": 1215 + }, + { + "epoch": 1.952, + "grad_norm": 0.9057906270027161, + "learning_rate": 5.441443776365003e-05, + "loss": 0.5587, + "step": 1220 + }, + { + "epoch": 1.96, + "grad_norm": 0.7144932150840759, + "learning_rate": 5.3670396488013854e-05, + "loss": 0.5601, + "step": 1225 + }, + { + "epoch": 1.968, + "grad_norm": 1.0071252584457397, + "learning_rate": 5.292960678346675e-05, + "loss": 0.5941, + "step": 1230 + }, + { + "epoch": 1.976, + "grad_norm": 0.7061260342597961, + "learning_rate": 5.2192120641180786e-05, + "loss": 0.6285, + "step": 1235 + }, + { + "epoch": 1.984, + "grad_norm": 0.6937069892883301, + "learning_rate": 5.145798982047261e-05, + "loss": 0.6516, + "step": 1240 + }, + { + "epoch": 1.992, + "grad_norm": 0.5176392793655396, + "learning_rate": 5.072726584517086e-05, + "loss": 0.5904, + "step": 1245 + }, + { + "epoch": 2.0, + "grad_norm": 0.8501409292221069, + "learning_rate": 5.000000000000002e-05, + "loss": 0.619, + "step": 1250 + }, + { + "epoch": 2.008, + "grad_norm": 0.5451085567474365, + "learning_rate": 4.927624332698109e-05, + "loss": 0.6058, + "step": 1255 + }, + { + "epoch": 2.016, + "grad_norm": 0.8410437107086182, + "learning_rate": 4.8556046621849346e-05, + "loss": 0.6248, + "step": 1260 + }, + { + "epoch": 2.024, + "grad_norm": 0.6089378595352173, + "learning_rate": 4.783946043048923e-05, + "loss": 0.5247, + "step": 1265 + }, + { + "epoch": 2.032, + "grad_norm": 0.47437986731529236, + "learning_rate": 4.712653504538683e-05, + "loss": 0.5439, + "step": 1270 + }, + { + "epoch": 2.04, + "grad_norm": 0.6925654411315918, + "learning_rate": 4.6417320502100316e-05, + "loss": 0.4491, + "step": 1275 + }, + { + "epoch": 2.048, + "grad_norm": 0.8538162708282471, + "learning_rate": 4.5711866575748276e-05, + "loss": 0.52, + "step": 1280 + }, + { + "epoch": 2.056, + "grad_norm": 0.8048768639564514, + "learning_rate": 4.501022277751602e-05, + "loss": 0.5259, + "step": 1285 + }, + { + "epoch": 2.064, + "grad_norm": 0.7191641330718994, + "learning_rate": 4.431243835118124e-05, + "loss": 0.5025, + "step": 1290 + }, + { + "epoch": 2.072, + "grad_norm": 1.0918892621994019, + "learning_rate": 4.361856226965733e-05, + "loss": 0.4772, + "step": 1295 + }, + { + "epoch": 2.08, + "grad_norm": 0.8152824640274048, + "learning_rate": 4.2928643231556844e-05, + "loss": 0.5945, + "step": 1300 + }, + { + "epoch": 2.088, + "grad_norm": 0.640073299407959, + "learning_rate": 4.224272965777326e-05, + "loss": 0.4813, + "step": 1305 + }, + { + "epoch": 2.096, + "grad_norm": 0.7172432541847229, + "learning_rate": 4.15608696880828e-05, + "loss": 0.5315, + "step": 1310 + }, + { + "epoch": 2.104, + "grad_norm": 1.1283674240112305, + "learning_rate": 4.08831111777658e-05, + "loss": 0.5591, + "step": 1315 + }, + { + "epoch": 2.112, + "grad_norm": 0.8184736967086792, + "learning_rate": 4.020950169424815e-05, + "loss": 0.605, + "step": 1320 + }, + { + "epoch": 2.12, + "grad_norm": 0.6823618412017822, + "learning_rate": 3.954008851376252e-05, + "loss": 0.4955, + "step": 1325 + }, + { + "epoch": 2.128, + "grad_norm": 0.8576385378837585, + "learning_rate": 3.887491861803085e-05, + "loss": 0.5757, + "step": 1330 + }, + { + "epoch": 2.136, + "grad_norm": 0.967835009098053, + "learning_rate": 3.821403869096658e-05, + "loss": 0.5313, + "step": 1335 + }, + { + "epoch": 2.144, + "grad_norm": 0.7330173254013062, + "learning_rate": 3.755749511539845e-05, + "loss": 0.5904, + "step": 1340 + }, + { + "epoch": 2.152, + "grad_norm": 0.6664792895317078, + "learning_rate": 3.690533396981504e-05, + "loss": 0.4679, + "step": 1345 + }, + { + "epoch": 2.16, + "grad_norm": 0.7639065980911255, + "learning_rate": 3.6257601025131026e-05, + "loss": 0.5235, + "step": 1350 + }, + { + "epoch": 2.168, + "grad_norm": 0.6960520148277283, + "learning_rate": 3.561434174147463e-05, + "loss": 0.5797, + "step": 1355 + }, + { + "epoch": 2.176, + "grad_norm": 0.8231356143951416, + "learning_rate": 3.497560126499709e-05, + "loss": 0.5772, + "step": 1360 + }, + { + "epoch": 2.184, + "grad_norm": 0.8968437910079956, + "learning_rate": 3.4341424424704375e-05, + "loss": 0.5316, + "step": 1365 + }, + { + "epoch": 2.192, + "grad_norm": 0.8037480711936951, + "learning_rate": 3.371185572931048e-05, + "loss": 0.5646, + "step": 1370 + }, + { + "epoch": 2.2, + "grad_norm": 0.9330148696899414, + "learning_rate": 3.308693936411421e-05, + "loss": 0.5431, + "step": 1375 + }, + { + "epoch": 2.208, + "grad_norm": 0.6958775520324707, + "learning_rate": 3.246671918789755e-05, + "loss": 0.5403, + "step": 1380 + }, + { + "epoch": 2.216, + "grad_norm": 0.9870476126670837, + "learning_rate": 3.1851238729848034e-05, + "loss": 0.5329, + "step": 1385 + }, + { + "epoch": 2.224, + "grad_norm": 0.5036590099334717, + "learning_rate": 3.124054118650327e-05, + "loss": 0.5696, + "step": 1390 + }, + { + "epoch": 2.232, + "grad_norm": 0.8640053868293762, + "learning_rate": 3.063466941871952e-05, + "loss": 0.59, + "step": 1395 + }, + { + "epoch": 2.24, + "grad_norm": 0.6065173149108887, + "learning_rate": 3.0033665948663448e-05, + "loss": 0.5116, + "step": 1400 + }, + { + "epoch": 2.248, + "grad_norm": 1.083775520324707, + "learning_rate": 2.9437572956827964e-05, + "loss": 0.5783, + "step": 1405 + }, + { + "epoch": 2.2560000000000002, + "grad_norm": 0.7090497016906738, + "learning_rate": 2.8846432279071467e-05, + "loss": 0.6259, + "step": 1410 + }, + { + "epoch": 2.2640000000000002, + "grad_norm": 0.742468535900116, + "learning_rate": 2.826028540368215e-05, + "loss": 0.5759, + "step": 1415 + }, + { + "epoch": 2.2720000000000002, + "grad_norm": 0.9219839572906494, + "learning_rate": 2.7679173468465812e-05, + "loss": 0.497, + "step": 1420 + }, + { + "epoch": 2.2800000000000002, + "grad_norm": 0.7159206867218018, + "learning_rate": 2.7103137257858868e-05, + "loss": 0.619, + "step": 1425 + }, + { + "epoch": 2.288, + "grad_norm": 0.6997727751731873, + "learning_rate": 2.6532217200065858e-05, + "loss": 0.5858, + "step": 1430 + }, + { + "epoch": 2.296, + "grad_norm": 0.7493643164634705, + "learning_rate": 2.5966453364222186e-05, + "loss": 0.6291, + "step": 1435 + }, + { + "epoch": 2.304, + "grad_norm": 0.8311699032783508, + "learning_rate": 2.540588545758179e-05, + "loss": 0.6418, + "step": 1440 + }, + { + "epoch": 2.312, + "grad_norm": 0.7084354758262634, + "learning_rate": 2.48505528227304e-05, + "loss": 0.5483, + "step": 1445 + }, + { + "epoch": 2.32, + "grad_norm": 0.734438955783844, + "learning_rate": 2.4300494434824373e-05, + "loss": 0.6071, + "step": 1450 + }, + { + "epoch": 2.328, + "grad_norm": 0.8913635015487671, + "learning_rate": 2.37557488988552e-05, + "loss": 0.5099, + "step": 1455 + }, + { + "epoch": 2.336, + "grad_norm": 0.8349048495292664, + "learning_rate": 2.321635444694028e-05, + "loss": 0.5186, + "step": 1460 + }, + { + "epoch": 2.344, + "grad_norm": 0.6164011359214783, + "learning_rate": 2.2682348935639274e-05, + "loss": 0.5043, + "step": 1465 + }, + { + "epoch": 2.352, + "grad_norm": 1.044892430305481, + "learning_rate": 2.2153769843297667e-05, + "loss": 0.61, + "step": 1470 + }, + { + "epoch": 2.36, + "grad_norm": 0.9142879247665405, + "learning_rate": 2.163065426741603e-05, + "loss": 0.5987, + "step": 1475 + }, + { + "epoch": 2.368, + "grad_norm": 0.6232836842536926, + "learning_rate": 2.1113038922046602e-05, + "loss": 0.5212, + "step": 1480 + }, + { + "epoch": 2.376, + "grad_norm": 0.49558231234550476, + "learning_rate": 2.0600960135216462e-05, + "loss": 0.4796, + "step": 1485 + }, + { + "epoch": 2.384, + "grad_norm": 0.7887687683105469, + "learning_rate": 2.009445384637805e-05, + "loss": 0.4844, + "step": 1490 + }, + { + "epoch": 2.392, + "grad_norm": 0.8086990714073181, + "learning_rate": 1.9593555603886538e-05, + "loss": 0.5085, + "step": 1495 + }, + { + "epoch": 2.4, + "grad_norm": 0.6713303327560425, + "learning_rate": 1.9098300562505266e-05, + "loss": 0.4839, + "step": 1500 + }, + { + "epoch": 2.408, + "grad_norm": 0.6262741684913635, + "learning_rate": 1.8608723480938206e-05, + "loss": 0.5715, + "step": 1505 + }, + { + "epoch": 2.416, + "grad_norm": 0.8025808334350586, + "learning_rate": 1.812485871939056e-05, + "loss": 0.5266, + "step": 1510 + }, + { + "epoch": 2.424, + "grad_norm": 0.8753231167793274, + "learning_rate": 1.7646740237157256e-05, + "loss": 0.5422, + "step": 1515 + }, + { + "epoch": 2.432, + "grad_norm": 0.6459301710128784, + "learning_rate": 1.7174401590239587e-05, + "loss": 0.5553, + "step": 1520 + }, + { + "epoch": 2.44, + "grad_norm": 0.6917416453361511, + "learning_rate": 1.6707875928990058e-05, + "loss": 0.5765, + "step": 1525 + }, + { + "epoch": 2.448, + "grad_norm": 0.7890029549598694, + "learning_rate": 1.6247195995785837e-05, + "loss": 0.549, + "step": 1530 + }, + { + "epoch": 2.456, + "grad_norm": 0.9913660883903503, + "learning_rate": 1.579239412273078e-05, + "loss": 0.4876, + "step": 1535 + }, + { + "epoch": 2.464, + "grad_norm": 0.9030985832214355, + "learning_rate": 1.5343502229386207e-05, + "loss": 0.5546, + "step": 1540 + }, + { + "epoch": 2.472, + "grad_norm": 0.9133403301239014, + "learning_rate": 1.4900551820530828e-05, + "loss": 0.5356, + "step": 1545 + }, + { + "epoch": 2.48, + "grad_norm": 0.7083793878555298, + "learning_rate": 1.4463573983949341e-05, + "loss": 0.5142, + "step": 1550 + }, + { + "epoch": 2.488, + "grad_norm": 1.095435619354248, + "learning_rate": 1.40325993882509e-05, + "loss": 0.6054, + "step": 1555 + }, + { + "epoch": 2.496, + "grad_norm": 0.8825190663337708, + "learning_rate": 1.3607658280716473e-05, + "loss": 0.5294, + "step": 1560 + }, + { + "epoch": 2.504, + "grad_norm": 0.9436343908309937, + "learning_rate": 1.3188780485176088e-05, + "loss": 0.5294, + "step": 1565 + }, + { + "epoch": 2.512, + "grad_norm": 1.0125439167022705, + "learning_rate": 1.2775995399915631e-05, + "loss": 0.4905, + "step": 1570 + }, + { + "epoch": 2.52, + "grad_norm": 0.8476350903511047, + "learning_rate": 1.2369331995613665e-05, + "loss": 0.5186, + "step": 1575 + }, + { + "epoch": 2.528, + "grad_norm": 0.9092681407928467, + "learning_rate": 1.196881881330798e-05, + "loss": 0.4909, + "step": 1580 + }, + { + "epoch": 2.536, + "grad_norm": 0.7970360517501831, + "learning_rate": 1.1574483962392767e-05, + "loss": 0.5303, + "step": 1585 + }, + { + "epoch": 2.544, + "grad_norm": 0.8575041890144348, + "learning_rate": 1.1186355118645554e-05, + "loss": 0.5169, + "step": 1590 + }, + { + "epoch": 2.552, + "grad_norm": 0.7397408485412598, + "learning_rate": 1.0804459522284926e-05, + "loss": 0.5339, + "step": 1595 + }, + { + "epoch": 2.56, + "grad_norm": 0.7415968179702759, + "learning_rate": 1.042882397605871e-05, + "loss": 0.5283, + "step": 1600 + }, + { + "epoch": 2.568, + "grad_norm": 0.7035180926322937, + "learning_rate": 1.0059474843362892e-05, + "loss": 0.5576, + "step": 1605 + }, + { + "epoch": 2.576, + "grad_norm": 0.9805112481117249, + "learning_rate": 9.696438046391288e-06, + "loss": 0.5136, + "step": 1610 + }, + { + "epoch": 2.584, + "grad_norm": 0.6661838889122009, + "learning_rate": 9.339739064316233e-06, + "loss": 0.5885, + "step": 1615 + }, + { + "epoch": 2.592, + "grad_norm": 0.8581559062004089, + "learning_rate": 8.989402931500434e-06, + "loss": 0.5, + "step": 1620 + }, + { + "epoch": 2.6, + "grad_norm": 0.7146279811859131, + "learning_rate": 8.645454235739903e-06, + "loss": 0.5325, + "step": 1625 + }, + { + "epoch": 2.608, + "grad_norm": 0.9474234580993652, + "learning_rate": 8.307917116538378e-06, + "loss": 0.5772, + "step": 1630 + }, + { + "epoch": 2.616, + "grad_norm": 0.9583209753036499, + "learning_rate": 7.976815263412963e-06, + "loss": 0.5736, + "step": 1635 + }, + { + "epoch": 2.624, + "grad_norm": 0.7156705260276794, + "learning_rate": 7.652171914231776e-06, + "loss": 0.5199, + "step": 1640 + }, + { + "epoch": 2.632, + "grad_norm": 0.8224849700927734, + "learning_rate": 7.3340098535827905e-06, + "loss": 0.5753, + "step": 1645 + }, + { + "epoch": 2.64, + "grad_norm": 0.8689257502555847, + "learning_rate": 7.022351411174866e-06, + "loss": 0.5424, + "step": 1650 + }, + { + "epoch": 2.648, + "grad_norm": 0.6636053323745728, + "learning_rate": 6.717218460270536e-06, + "loss": 0.5555, + "step": 1655 + }, + { + "epoch": 2.656, + "grad_norm": 0.8688860535621643, + "learning_rate": 6.418632416150927e-06, + "loss": 0.4936, + "step": 1660 + }, + { + "epoch": 2.664, + "grad_norm": 0.6272854208946228, + "learning_rate": 6.126614234612593e-06, + "loss": 0.6291, + "step": 1665 + }, + { + "epoch": 2.672, + "grad_norm": 1.2240337133407593, + "learning_rate": 5.8411844104969916e-06, + "loss": 0.5197, + "step": 1670 + }, + { + "epoch": 2.68, + "grad_norm": 0.9820936918258667, + "learning_rate": 5.562362976251901e-06, + "loss": 0.5398, + "step": 1675 + }, + { + "epoch": 2.6879999999999997, + "grad_norm": 1.1582359075546265, + "learning_rate": 5.290169500525577e-06, + "loss": 0.6059, + "step": 1680 + }, + { + "epoch": 2.6959999999999997, + "grad_norm": 0.5501114726066589, + "learning_rate": 5.024623086793323e-06, + "loss": 0.531, + "step": 1685 + }, + { + "epoch": 2.7039999999999997, + "grad_norm": 0.8848717212677002, + "learning_rate": 4.765742372016735e-06, + "loss": 0.6054, + "step": 1690 + }, + { + "epoch": 2.7119999999999997, + "grad_norm": 0.7358693480491638, + "learning_rate": 4.513545525335705e-06, + "loss": 0.5173, + "step": 1695 + }, + { + "epoch": 2.7199999999999998, + "grad_norm": 0.9218215942382812, + "learning_rate": 4.268050246793276e-06, + "loss": 0.4944, + "step": 1700 + } + ], + "logging_steps": 5, + "max_steps": 1875, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.0611966095445197e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1700/training_args.bin b/checkpoint-1700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b72392a9094690809605a9b89988a05f68c770c0 --- /dev/null +++ b/checkpoint-1700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f6303cd7c1754cff6fd463f0e4dddf2a566ba7f29bbf2a72fbb560b32d5ce98 +size 5432 diff --git a/checkpoint-1800/README.md b/checkpoint-1800/README.md new file mode 100644 index 0000000000000000000000000000000000000000..31d07dbe00996b0568fd0eb91d05ac9a95722459 --- /dev/null +++ b/checkpoint-1800/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.2-1B +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-1800/adapter_config.json b/checkpoint-1800/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73202e5c7ba7132c890e52da7442b36a9efff4dd --- /dev/null +++ b/checkpoint-1800/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.2-1B", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "down_proj", + "k_proj", + "gate_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1800/adapter_model.safetensors b/checkpoint-1800/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b4aecf3346bcecbacc7e12e4fe42fa76d1dd530 --- /dev/null +++ b/checkpoint-1800/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a0aa6caaaa835f73a08a0f38672aff5ecdffd345851e6ee438b1aae475f7fb4 +size 22573704 diff --git a/checkpoint-1800/optimizer.pt b/checkpoint-1800/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b835c159985636ae511870047ef1df567a1856ea --- /dev/null +++ b/checkpoint-1800/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad02be800eb66fb7021bc5f65353218dc64b0a30d138c00e3f59085a7f0057dc +size 45276986 diff --git a/checkpoint-1800/rng_state.pth b/checkpoint-1800/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..323d589b0258d085c7a07b0c4f97b26796e865c5 --- /dev/null +++ b/checkpoint-1800/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9196a1e708bf24d6abba41cce3f8558820acc3e50f9394c5955e29eb41ffea3d +size 14244 diff --git a/checkpoint-1800/scheduler.pt b/checkpoint-1800/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7700a21f899d862b51aa9922a23a28f63fc9f25f --- /dev/null +++ b/checkpoint-1800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5dc0a991543597746accf379f280ebfcdf588edf4bc55d548208c58399ab795 +size 1064 diff --git a/checkpoint-1800/special_tokens_map.json b/checkpoint-1800/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/checkpoint-1800/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/checkpoint-1800/tokenizer.json b/checkpoint-1800/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-1800/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-1800/tokenizer_config.json b/checkpoint-1800/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61c03bf2b5e0c03056cebf7a58c8c096dafc1ffc --- /dev/null +++ b/checkpoint-1800/tokenizer_config.json @@ -0,0 +1,2065 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end_of_text|>' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|end_of_text|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1800/trainer_state.json b/checkpoint-1800/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..efe3e34287a39756ec5f51363aa42c9980d43882 --- /dev/null +++ b/checkpoint-1800/trainer_state.json @@ -0,0 +1,2553 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.88, + "eval_steps": 500, + "global_step": 1800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008, + "grad_norm": 0.8485889434814453, + "learning_rate": 0.0001999964908278481, + "loss": 1.2049, + "step": 5 + }, + { + "epoch": 0.016, + "grad_norm": 0.47789862751960754, + "learning_rate": 0.00019998596355767805, + "loss": 0.9333, + "step": 10 + }, + { + "epoch": 0.024, + "grad_norm": 1.017558217048645, + "learning_rate": 0.00019996841892833, + "loss": 0.8671, + "step": 15 + }, + { + "epoch": 0.032, + "grad_norm": 0.6610977053642273, + "learning_rate": 0.00019994385817114646, + "loss": 0.7979, + "step": 20 + }, + { + "epoch": 0.04, + "grad_norm": 0.6075429320335388, + "learning_rate": 0.00019991228300988585, + "loss": 0.7662, + "step": 25 + }, + { + "epoch": 0.048, + "grad_norm": 0.6595763564109802, + "learning_rate": 0.00019987369566060176, + "loss": 0.7929, + "step": 30 + }, + { + "epoch": 0.056, + "grad_norm": 0.6968618035316467, + "learning_rate": 0.00019982809883148722, + "loss": 0.7683, + "step": 35 + }, + { + "epoch": 0.064, + "grad_norm": 0.4889592230319977, + "learning_rate": 0.00019977549572268468, + "loss": 0.8667, + "step": 40 + }, + { + "epoch": 0.072, + "grad_norm": 0.6651108264923096, + "learning_rate": 0.0001997158900260614, + "loss": 0.8446, + "step": 45 + }, + { + "epoch": 0.08, + "grad_norm": 0.5898510217666626, + "learning_rate": 0.00019964928592495045, + "loss": 0.9051, + "step": 50 + }, + { + "epoch": 0.088, + "grad_norm": 0.4398016035556793, + "learning_rate": 0.00019957568809385694, + "loss": 0.7235, + "step": 55 + }, + { + "epoch": 0.096, + "grad_norm": 0.6901968121528625, + "learning_rate": 0.00019949510169813003, + "loss": 0.8169, + "step": 60 + }, + { + "epoch": 0.104, + "grad_norm": 0.6267213225364685, + "learning_rate": 0.00019940753239360047, + "loss": 0.8266, + "step": 65 + }, + { + "epoch": 0.112, + "grad_norm": 0.48524895310401917, + "learning_rate": 0.00019931298632618356, + "loss": 0.758, + "step": 70 + }, + { + "epoch": 0.12, + "grad_norm": 0.5294132232666016, + "learning_rate": 0.0001992114701314478, + "loss": 0.7759, + "step": 75 + }, + { + "epoch": 0.128, + "grad_norm": 0.48957982659339905, + "learning_rate": 0.0001991029909341493, + "loss": 0.7797, + "step": 80 + }, + { + "epoch": 0.136, + "grad_norm": 0.645412802696228, + "learning_rate": 0.00019898755634773158, + "loss": 0.7437, + "step": 85 + }, + { + "epoch": 0.144, + "grad_norm": 0.43297675251960754, + "learning_rate": 0.0001988651744737914, + "loss": 0.8043, + "step": 90 + }, + { + "epoch": 0.152, + "grad_norm": 0.5513920783996582, + "learning_rate": 0.00019873585390151003, + "loss": 0.7701, + "step": 95 + }, + { + "epoch": 0.16, + "grad_norm": 0.8462435007095337, + "learning_rate": 0.0001985996037070505, + "loss": 0.709, + "step": 100 + }, + { + "epoch": 0.168, + "grad_norm": 0.6892585158348083, + "learning_rate": 0.00019845643345292054, + "loss": 0.7377, + "step": 105 + }, + { + "epoch": 0.176, + "grad_norm": 0.4617864191532135, + "learning_rate": 0.00019830635318730154, + "loss": 0.8352, + "step": 110 + }, + { + "epoch": 0.184, + "grad_norm": 0.6300354599952698, + "learning_rate": 0.0001981493734433433, + "loss": 0.7738, + "step": 115 + }, + { + "epoch": 0.192, + "grad_norm": 0.8086859583854675, + "learning_rate": 0.0001979855052384247, + "loss": 0.8067, + "step": 120 + }, + { + "epoch": 0.2, + "grad_norm": 0.6272985935211182, + "learning_rate": 0.00019781476007338058, + "loss": 0.7456, + "step": 125 + }, + { + "epoch": 0.208, + "grad_norm": 0.44750839471817017, + "learning_rate": 0.00019763714993169452, + "loss": 0.758, + "step": 130 + }, + { + "epoch": 0.216, + "grad_norm": 0.5053977370262146, + "learning_rate": 0.00019745268727865774, + "loss": 0.7895, + "step": 135 + }, + { + "epoch": 0.224, + "grad_norm": 0.41920769214630127, + "learning_rate": 0.00019726138506049438, + "loss": 0.7302, + "step": 140 + }, + { + "epoch": 0.232, + "grad_norm": 0.38280290365219116, + "learning_rate": 0.00019706325670345275, + "loss": 0.8152, + "step": 145 + }, + { + "epoch": 0.24, + "grad_norm": 0.554710865020752, + "learning_rate": 0.0001968583161128631, + "loss": 0.8461, + "step": 150 + }, + { + "epoch": 0.248, + "grad_norm": 0.5612509250640869, + "learning_rate": 0.00019664657767216176, + "loss": 0.7787, + "step": 155 + }, + { + "epoch": 0.256, + "grad_norm": 0.610614538192749, + "learning_rate": 0.00019642805624188147, + "loss": 0.7574, + "step": 160 + }, + { + "epoch": 0.264, + "grad_norm": 0.679517924785614, + "learning_rate": 0.0001962027671586086, + "loss": 0.8487, + "step": 165 + }, + { + "epoch": 0.272, + "grad_norm": 0.6685434579849243, + "learning_rate": 0.00019597072623390668, + "loss": 0.6611, + "step": 170 + }, + { + "epoch": 0.28, + "grad_norm": 0.480293869972229, + "learning_rate": 0.00019573194975320673, + "loss": 0.7802, + "step": 175 + }, + { + "epoch": 0.288, + "grad_norm": 0.7727369070053101, + "learning_rate": 0.00019548645447466431, + "loss": 0.6727, + "step": 180 + }, + { + "epoch": 0.296, + "grad_norm": 0.6371043920516968, + "learning_rate": 0.00019523425762798329, + "loss": 0.7502, + "step": 185 + }, + { + "epoch": 0.304, + "grad_norm": 0.6399966478347778, + "learning_rate": 0.00019497537691320668, + "loss": 0.8401, + "step": 190 + }, + { + "epoch": 0.312, + "grad_norm": 0.7263137698173523, + "learning_rate": 0.00019470983049947444, + "loss": 0.7494, + "step": 195 + }, + { + "epoch": 0.32, + "grad_norm": 0.402416467666626, + "learning_rate": 0.00019443763702374812, + "loss": 0.7842, + "step": 200 + }, + { + "epoch": 0.328, + "grad_norm": 0.6639626026153564, + "learning_rate": 0.00019415881558950302, + "loss": 0.8082, + "step": 205 + }, + { + "epoch": 0.336, + "grad_norm": 0.5801042914390564, + "learning_rate": 0.00019387338576538744, + "loss": 0.7883, + "step": 210 + }, + { + "epoch": 0.344, + "grad_norm": 0.5533607006072998, + "learning_rate": 0.00019358136758384912, + "loss": 0.7356, + "step": 215 + }, + { + "epoch": 0.352, + "grad_norm": 0.6019654273986816, + "learning_rate": 0.00019328278153972947, + "loss": 0.7891, + "step": 220 + }, + { + "epoch": 0.36, + "grad_norm": 0.5344104170799255, + "learning_rate": 0.00019297764858882514, + "loss": 0.7671, + "step": 225 + }, + { + "epoch": 0.368, + "grad_norm": 0.5494843125343323, + "learning_rate": 0.0001926659901464172, + "loss": 0.6608, + "step": 230 + }, + { + "epoch": 0.376, + "grad_norm": 0.465420126914978, + "learning_rate": 0.00019234782808576824, + "loss": 0.647, + "step": 235 + }, + { + "epoch": 0.384, + "grad_norm": 0.5202775001525879, + "learning_rate": 0.00019202318473658705, + "loss": 0.729, + "step": 240 + }, + { + "epoch": 0.392, + "grad_norm": 0.5757818222045898, + "learning_rate": 0.00019169208288346166, + "loss": 0.6713, + "step": 245 + }, + { + "epoch": 0.4, + "grad_norm": 0.46555572748184204, + "learning_rate": 0.0001913545457642601, + "loss": 0.7049, + "step": 250 + }, + { + "epoch": 0.408, + "grad_norm": 0.5101790428161621, + "learning_rate": 0.00019101059706849957, + "loss": 0.7419, + "step": 255 + }, + { + "epoch": 0.416, + "grad_norm": 0.6083744764328003, + "learning_rate": 0.00019066026093568378, + "loss": 0.7148, + "step": 260 + }, + { + "epoch": 0.424, + "grad_norm": 0.4719640612602234, + "learning_rate": 0.00019030356195360874, + "loss": 0.7493, + "step": 265 + }, + { + "epoch": 0.432, + "grad_norm": 0.7365225553512573, + "learning_rate": 0.0001899405251566371, + "loss": 0.7652, + "step": 270 + }, + { + "epoch": 0.44, + "grad_norm": 0.4452705383300781, + "learning_rate": 0.0001895711760239413, + "loss": 0.7438, + "step": 275 + }, + { + "epoch": 0.448, + "grad_norm": 0.6071786284446716, + "learning_rate": 0.0001891955404777151, + "loss": 0.7683, + "step": 280 + }, + { + "epoch": 0.456, + "grad_norm": 0.5774498581886292, + "learning_rate": 0.00018881364488135448, + "loss": 0.8115, + "step": 285 + }, + { + "epoch": 0.464, + "grad_norm": 0.6134682893753052, + "learning_rate": 0.00018842551603760724, + "loss": 0.8335, + "step": 290 + }, + { + "epoch": 0.472, + "grad_norm": 0.4869893193244934, + "learning_rate": 0.00018803118118669202, + "loss": 0.6933, + "step": 295 + }, + { + "epoch": 0.48, + "grad_norm": 0.6457111239433289, + "learning_rate": 0.00018763066800438636, + "loss": 0.7515, + "step": 300 + }, + { + "epoch": 0.488, + "grad_norm": 0.59674471616745, + "learning_rate": 0.0001872240046000844, + "loss": 0.6931, + "step": 305 + }, + { + "epoch": 0.496, + "grad_norm": 0.44608160853385925, + "learning_rate": 0.00018681121951482393, + "loss": 0.782, + "step": 310 + }, + { + "epoch": 0.504, + "grad_norm": 0.5934664607048035, + "learning_rate": 0.00018639234171928353, + "loss": 0.7361, + "step": 315 + }, + { + "epoch": 0.512, + "grad_norm": 0.49716323614120483, + "learning_rate": 0.0001859674006117491, + "loss": 0.7443, + "step": 320 + }, + { + "epoch": 0.52, + "grad_norm": 0.47995495796203613, + "learning_rate": 0.00018553642601605068, + "loss": 0.7221, + "step": 325 + }, + { + "epoch": 0.528, + "grad_norm": 0.5177399516105652, + "learning_rate": 0.00018509944817946922, + "loss": 0.7622, + "step": 330 + }, + { + "epoch": 0.536, + "grad_norm": 0.6638798713684082, + "learning_rate": 0.0001846564977706138, + "loss": 0.8556, + "step": 335 + }, + { + "epoch": 0.544, + "grad_norm": 0.5056771636009216, + "learning_rate": 0.00018420760587726923, + "loss": 0.7814, + "step": 340 + }, + { + "epoch": 0.552, + "grad_norm": 0.44543707370758057, + "learning_rate": 0.0001837528040042142, + "loss": 0.722, + "step": 345 + }, + { + "epoch": 0.56, + "grad_norm": 0.6765120625495911, + "learning_rate": 0.00018329212407100994, + "loss": 0.7903, + "step": 350 + }, + { + "epoch": 0.568, + "grad_norm": 0.49232372641563416, + "learning_rate": 0.00018282559840976042, + "loss": 0.6996, + "step": 355 + }, + { + "epoch": 0.576, + "grad_norm": 0.47392791509628296, + "learning_rate": 0.00018235325976284275, + "loss": 0.773, + "step": 360 + }, + { + "epoch": 0.584, + "grad_norm": 0.5056615471839905, + "learning_rate": 0.00018187514128060946, + "loss": 0.728, + "step": 365 + }, + { + "epoch": 0.592, + "grad_norm": 0.5857616662979126, + "learning_rate": 0.00018139127651906184, + "loss": 0.7659, + "step": 370 + }, + { + "epoch": 0.6, + "grad_norm": 0.5966864228248596, + "learning_rate": 0.00018090169943749476, + "loss": 0.7039, + "step": 375 + }, + { + "epoch": 0.608, + "grad_norm": 0.4524347484111786, + "learning_rate": 0.00018040644439611348, + "loss": 0.7125, + "step": 380 + }, + { + "epoch": 0.616, + "grad_norm": 0.5570976138114929, + "learning_rate": 0.00017990554615362198, + "loss": 0.698, + "step": 385 + }, + { + "epoch": 0.624, + "grad_norm": 0.6045777201652527, + "learning_rate": 0.00017939903986478355, + "loss": 0.8255, + "step": 390 + }, + { + "epoch": 0.632, + "grad_norm": 0.6149687767028809, + "learning_rate": 0.00017888696107795342, + "loss": 0.6616, + "step": 395 + }, + { + "epoch": 0.64, + "grad_norm": 0.4873579144477844, + "learning_rate": 0.000178369345732584, + "loss": 0.7452, + "step": 400 + }, + { + "epoch": 0.648, + "grad_norm": 0.5569061636924744, + "learning_rate": 0.00017784623015670238, + "loss": 0.7652, + "step": 405 + }, + { + "epoch": 0.656, + "grad_norm": 0.5825181603431702, + "learning_rate": 0.00017731765106436073, + "loss": 0.7793, + "step": 410 + }, + { + "epoch": 0.664, + "grad_norm": 0.4047383666038513, + "learning_rate": 0.00017678364555305978, + "loss": 0.6875, + "step": 415 + }, + { + "epoch": 0.672, + "grad_norm": 0.5080836415290833, + "learning_rate": 0.0001762442511011448, + "loss": 0.7465, + "step": 420 + }, + { + "epoch": 0.68, + "grad_norm": 0.5825940370559692, + "learning_rate": 0.00017569950556517566, + "loss": 0.7205, + "step": 425 + }, + { + "epoch": 0.688, + "grad_norm": 0.476992666721344, + "learning_rate": 0.00017514944717726962, + "loss": 0.6589, + "step": 430 + }, + { + "epoch": 0.696, + "grad_norm": 0.7424727082252502, + "learning_rate": 0.00017459411454241822, + "loss": 0.7035, + "step": 435 + }, + { + "epoch": 0.704, + "grad_norm": 0.6544787287712097, + "learning_rate": 0.00017403354663577783, + "loss": 0.787, + "step": 440 + }, + { + "epoch": 0.712, + "grad_norm": 0.49425187706947327, + "learning_rate": 0.00017346778279993415, + "loss": 0.7515, + "step": 445 + }, + { + "epoch": 0.72, + "grad_norm": 0.5473236441612244, + "learning_rate": 0.00017289686274214118, + "loss": 0.7199, + "step": 450 + }, + { + "epoch": 0.728, + "grad_norm": 0.6773544549942017, + "learning_rate": 0.00017232082653153422, + "loss": 0.8037, + "step": 455 + }, + { + "epoch": 0.736, + "grad_norm": 0.6355096101760864, + "learning_rate": 0.00017173971459631787, + "loss": 0.7502, + "step": 460 + }, + { + "epoch": 0.744, + "grad_norm": 0.47867000102996826, + "learning_rate": 0.00017115356772092857, + "loss": 0.7446, + "step": 465 + }, + { + "epoch": 0.752, + "grad_norm": 0.5135357975959778, + "learning_rate": 0.0001705624270431721, + "loss": 0.6507, + "step": 470 + }, + { + "epoch": 0.76, + "grad_norm": 0.48866042494773865, + "learning_rate": 0.00016996633405133655, + "loss": 0.7164, + "step": 475 + }, + { + "epoch": 0.768, + "grad_norm": 0.5892354249954224, + "learning_rate": 0.0001693653305812805, + "loss": 0.7621, + "step": 480 + }, + { + "epoch": 0.776, + "grad_norm": 0.6633970141410828, + "learning_rate": 0.00016875945881349676, + "loss": 0.7623, + "step": 485 + }, + { + "epoch": 0.784, + "grad_norm": 0.6444060802459717, + "learning_rate": 0.000168148761270152, + "loss": 0.6606, + "step": 490 + }, + { + "epoch": 0.792, + "grad_norm": 0.7012648582458496, + "learning_rate": 0.00016753328081210245, + "loss": 0.6941, + "step": 495 + }, + { + "epoch": 0.8, + "grad_norm": 0.7064160704612732, + "learning_rate": 0.00016691306063588583, + "loss": 0.6841, + "step": 500 + }, + { + "epoch": 0.808, + "grad_norm": 0.7241398096084595, + "learning_rate": 0.00016628814427068953, + "loss": 0.6996, + "step": 505 + }, + { + "epoch": 0.816, + "grad_norm": 0.7807374596595764, + "learning_rate": 0.00016565857557529566, + "loss": 0.7542, + "step": 510 + }, + { + "epoch": 0.824, + "grad_norm": 0.763768196105957, + "learning_rate": 0.00016502439873500289, + "loss": 0.7175, + "step": 515 + }, + { + "epoch": 0.832, + "grad_norm": 0.6105090379714966, + "learning_rate": 0.0001643856582585254, + "loss": 0.7565, + "step": 520 + }, + { + "epoch": 0.84, + "grad_norm": 0.5686540603637695, + "learning_rate": 0.000163742398974869, + "loss": 0.7339, + "step": 525 + }, + { + "epoch": 0.848, + "grad_norm": 0.5341500043869019, + "learning_rate": 0.00016309466603018496, + "loss": 0.569, + "step": 530 + }, + { + "epoch": 0.856, + "grad_norm": 0.7274748682975769, + "learning_rate": 0.00016244250488460158, + "loss": 0.7556, + "step": 535 + }, + { + "epoch": 0.864, + "grad_norm": 0.7321165204048157, + "learning_rate": 0.00016178596130903344, + "loss": 0.7084, + "step": 540 + }, + { + "epoch": 0.872, + "grad_norm": 0.5086159110069275, + "learning_rate": 0.00016112508138196917, + "loss": 0.6935, + "step": 545 + }, + { + "epoch": 0.88, + "grad_norm": 0.4714389443397522, + "learning_rate": 0.0001604599114862375, + "loss": 0.7076, + "step": 550 + }, + { + "epoch": 0.888, + "grad_norm": 0.5031452178955078, + "learning_rate": 0.0001597904983057519, + "loss": 0.7151, + "step": 555 + }, + { + "epoch": 0.896, + "grad_norm": 0.7745943665504456, + "learning_rate": 0.0001591168888222342, + "loss": 0.7001, + "step": 560 + }, + { + "epoch": 0.904, + "grad_norm": 0.6076303124427795, + "learning_rate": 0.00015843913031191723, + "loss": 0.7285, + "step": 565 + }, + { + "epoch": 0.912, + "grad_norm": 0.7456529140472412, + "learning_rate": 0.00015775727034222675, + "loss": 0.8041, + "step": 570 + }, + { + "epoch": 0.92, + "grad_norm": 0.5760998725891113, + "learning_rate": 0.0001570713567684432, + "loss": 0.7353, + "step": 575 + }, + { + "epoch": 0.928, + "grad_norm": 0.7057327032089233, + "learning_rate": 0.00015638143773034267, + "loss": 0.7792, + "step": 580 + }, + { + "epoch": 0.936, + "grad_norm": 0.7615967392921448, + "learning_rate": 0.00015568756164881882, + "loss": 1.0121, + "step": 585 + }, + { + "epoch": 0.944, + "grad_norm": 0.6304950714111328, + "learning_rate": 0.000154989777222484, + "loss": 0.7727, + "step": 590 + }, + { + "epoch": 0.952, + "grad_norm": 0.6852543950080872, + "learning_rate": 0.00015428813342425177, + "loss": 0.741, + "step": 595 + }, + { + "epoch": 0.96, + "grad_norm": 0.6379660964012146, + "learning_rate": 0.00015358267949789966, + "loss": 0.6919, + "step": 600 + }, + { + "epoch": 0.968, + "grad_norm": 0.5846463441848755, + "learning_rate": 0.00015287346495461315, + "loss": 0.7163, + "step": 605 + }, + { + "epoch": 0.976, + "grad_norm": 0.5999557971954346, + "learning_rate": 0.0001521605395695108, + "loss": 0.8152, + "step": 610 + }, + { + "epoch": 0.984, + "grad_norm": 0.5806307196617126, + "learning_rate": 0.00015144395337815064, + "loss": 0.6709, + "step": 615 + }, + { + "epoch": 0.992, + "grad_norm": 0.6559942960739136, + "learning_rate": 0.00015072375667301893, + "loss": 0.6527, + "step": 620 + }, + { + "epoch": 1.0, + "grad_norm": 0.6287715435028076, + "learning_rate": 0.00015000000000000001, + "loss": 0.8194, + "step": 625 + }, + { + "epoch": 1.008, + "grad_norm": 0.616222620010376, + "learning_rate": 0.00014927273415482915, + "loss": 0.6627, + "step": 630 + }, + { + "epoch": 1.016, + "grad_norm": 0.4750412106513977, + "learning_rate": 0.0001485420101795274, + "loss": 0.6366, + "step": 635 + }, + { + "epoch": 1.024, + "grad_norm": 0.5122964978218079, + "learning_rate": 0.00014780787935881923, + "loss": 0.6717, + "step": 640 + }, + { + "epoch": 1.032, + "grad_norm": 0.7382633090019226, + "learning_rate": 0.0001470703932165333, + "loss": 0.6483, + "step": 645 + }, + { + "epoch": 1.04, + "grad_norm": 0.6540554761886597, + "learning_rate": 0.00014632960351198618, + "loss": 0.6151, + "step": 650 + }, + { + "epoch": 1.048, + "grad_norm": 0.4776591956615448, + "learning_rate": 0.00014558556223635003, + "loss": 0.6707, + "step": 655 + }, + { + "epoch": 1.056, + "grad_norm": 0.8012662529945374, + "learning_rate": 0.00014483832160900326, + "loss": 0.6125, + "step": 660 + }, + { + "epoch": 1.064, + "grad_norm": 0.6735953092575073, + "learning_rate": 0.00014408793407386588, + "loss": 0.6206, + "step": 665 + }, + { + "epoch": 1.072, + "grad_norm": 0.5640230774879456, + "learning_rate": 0.00014333445229571873, + "loss": 0.6161, + "step": 670 + }, + { + "epoch": 1.08, + "grad_norm": 0.5928654074668884, + "learning_rate": 0.00014257792915650728, + "loss": 0.6583, + "step": 675 + }, + { + "epoch": 1.088, + "grad_norm": 0.7347397208213806, + "learning_rate": 0.00014181841775163013, + "loss": 0.6222, + "step": 680 + }, + { + "epoch": 1.096, + "grad_norm": 0.593773365020752, + "learning_rate": 0.0001410559713862128, + "loss": 0.716, + "step": 685 + }, + { + "epoch": 1.104, + "grad_norm": 0.6244611144065857, + "learning_rate": 0.00014029064357136628, + "loss": 0.6198, + "step": 690 + }, + { + "epoch": 1.112, + "grad_norm": 0.5083370804786682, + "learning_rate": 0.00013952248802043165, + "loss": 0.6389, + "step": 695 + }, + { + "epoch": 1.12, + "grad_norm": 0.5241413116455078, + "learning_rate": 0.0001387515586452103, + "loss": 0.6842, + "step": 700 + }, + { + "epoch": 1.1280000000000001, + "grad_norm": 0.524029016494751, + "learning_rate": 0.00013797790955218014, + "loss": 0.6071, + "step": 705 + }, + { + "epoch": 1.1360000000000001, + "grad_norm": 0.5097878575325012, + "learning_rate": 0.00013720159503869815, + "loss": 0.5915, + "step": 710 + }, + { + "epoch": 1.144, + "grad_norm": 0.5782963037490845, + "learning_rate": 0.00013642266958918984, + "loss": 0.6794, + "step": 715 + }, + { + "epoch": 1.152, + "grad_norm": 0.6088266372680664, + "learning_rate": 0.00013564118787132506, + "loss": 0.6773, + "step": 720 + }, + { + "epoch": 1.16, + "grad_norm": 0.7768995761871338, + "learning_rate": 0.00013485720473218154, + "loss": 0.668, + "step": 725 + }, + { + "epoch": 1.168, + "grad_norm": 0.6645551919937134, + "learning_rate": 0.0001340707751943952, + "loss": 0.6997, + "step": 730 + }, + { + "epoch": 1.176, + "grad_norm": 0.9228842258453369, + "learning_rate": 0.00013328195445229868, + "loss": 0.831, + "step": 735 + }, + { + "epoch": 1.184, + "grad_norm": 0.7556049823760986, + "learning_rate": 0.00013249079786804765, + "loss": 0.6378, + "step": 740 + }, + { + "epoch": 1.192, + "grad_norm": 0.832775354385376, + "learning_rate": 0.0001316973609677352, + "loss": 0.6547, + "step": 745 + }, + { + "epoch": 1.2, + "grad_norm": 0.7329304814338684, + "learning_rate": 0.00013090169943749476, + "loss": 0.5808, + "step": 750 + }, + { + "epoch": 1.208, + "grad_norm": 0.7193475961685181, + "learning_rate": 0.00013010386911959206, + "loss": 0.5582, + "step": 755 + }, + { + "epoch": 1.216, + "grad_norm": 0.6274734735488892, + "learning_rate": 0.00012930392600850573, + "loss": 0.5801, + "step": 760 + }, + { + "epoch": 1.224, + "grad_norm": 0.6485865712165833, + "learning_rate": 0.0001285019262469976, + "loss": 0.65, + "step": 765 + }, + { + "epoch": 1.232, + "grad_norm": 0.7164427042007446, + "learning_rate": 0.00012769792612217224, + "loss": 0.6627, + "step": 770 + }, + { + "epoch": 1.24, + "grad_norm": 0.600775957107544, + "learning_rate": 0.00012689198206152657, + "loss": 0.5603, + "step": 775 + }, + { + "epoch": 1.248, + "grad_norm": 0.8377975225448608, + "learning_rate": 0.00012608415062898972, + "loss": 0.6525, + "step": 780 + }, + { + "epoch": 1.256, + "grad_norm": 0.8069924116134644, + "learning_rate": 0.00012527448852095295, + "loss": 0.6731, + "step": 785 + }, + { + "epoch": 1.264, + "grad_norm": 0.6501213908195496, + "learning_rate": 0.00012446305256229073, + "loss": 0.6255, + "step": 790 + }, + { + "epoch": 1.272, + "grad_norm": 0.62812340259552, + "learning_rate": 0.00012364989970237248, + "loss": 0.6585, + "step": 795 + }, + { + "epoch": 1.28, + "grad_norm": 0.5702307820320129, + "learning_rate": 0.00012283508701106557, + "loss": 0.5996, + "step": 800 + }, + { + "epoch": 1.288, + "grad_norm": 0.6311281323432922, + "learning_rate": 0.00012201867167473015, + "loss": 0.6355, + "step": 805 + }, + { + "epoch": 1.296, + "grad_norm": 0.5885419249534607, + "learning_rate": 0.00012120071099220549, + "loss": 0.6615, + "step": 810 + }, + { + "epoch": 1.304, + "grad_norm": 0.5239307284355164, + "learning_rate": 0.0001203812623707885, + "loss": 0.6096, + "step": 815 + }, + { + "epoch": 1.312, + "grad_norm": 0.6101869940757751, + "learning_rate": 0.00011956038332220483, + "loss": 0.5984, + "step": 820 + }, + { + "epoch": 1.32, + "grad_norm": 0.4395413100719452, + "learning_rate": 0.00011873813145857249, + "loss": 0.5569, + "step": 825 + }, + { + "epoch": 1.328, + "grad_norm": 0.8984820246696472, + "learning_rate": 0.00011791456448835825, + "loss": 0.7088, + "step": 830 + }, + { + "epoch": 1.336, + "grad_norm": 0.7709664106369019, + "learning_rate": 0.00011708974021232769, + "loss": 0.6731, + "step": 835 + }, + { + "epoch": 1.3439999999999999, + "grad_norm": 0.6782217025756836, + "learning_rate": 0.00011626371651948838, + "loss": 0.6188, + "step": 840 + }, + { + "epoch": 1.3519999999999999, + "grad_norm": 0.6427358984947205, + "learning_rate": 0.00011543655138302714, + "loss": 0.7004, + "step": 845 + }, + { + "epoch": 1.3599999999999999, + "grad_norm": 0.5902594923973083, + "learning_rate": 0.00011460830285624118, + "loss": 0.5884, + "step": 850 + }, + { + "epoch": 1.3679999999999999, + "grad_norm": 0.5935835838317871, + "learning_rate": 0.0001137790290684638, + "loss": 0.5739, + "step": 855 + }, + { + "epoch": 1.376, + "grad_norm": 0.6752728223800659, + "learning_rate": 0.00011294878822098469, + "loss": 0.6435, + "step": 860 + }, + { + "epoch": 1.384, + "grad_norm": 0.7927135825157166, + "learning_rate": 0.00011211763858296507, + "loss": 0.6897, + "step": 865 + }, + { + "epoch": 1.392, + "grad_norm": 0.714499294757843, + "learning_rate": 0.00011128563848734816, + "loss": 0.6641, + "step": 870 + }, + { + "epoch": 1.4, + "grad_norm": 0.7086356282234192, + "learning_rate": 0.00011045284632676536, + "loss": 0.6273, + "step": 875 + }, + { + "epoch": 1.408, + "grad_norm": 0.6125518679618835, + "learning_rate": 0.00010961932054943778, + "loss": 0.6437, + "step": 880 + }, + { + "epoch": 1.416, + "grad_norm": 0.5635287165641785, + "learning_rate": 0.00010878511965507434, + "loss": 0.6345, + "step": 885 + }, + { + "epoch": 1.424, + "grad_norm": 0.47936007380485535, + "learning_rate": 0.00010795030219076599, + "loss": 0.5913, + "step": 890 + }, + { + "epoch": 1.432, + "grad_norm": 0.7142558097839355, + "learning_rate": 0.00010711492674687671, + "loss": 0.6482, + "step": 895 + }, + { + "epoch": 1.44, + "grad_norm": 0.5252729058265686, + "learning_rate": 0.00010627905195293135, + "loss": 0.6165, + "step": 900 + }, + { + "epoch": 1.448, + "grad_norm": 0.896318793296814, + "learning_rate": 0.00010544273647350092, + "loss": 0.634, + "step": 905 + }, + { + "epoch": 1.456, + "grad_norm": 0.6029036045074463, + "learning_rate": 0.00010460603900408523, + "loss": 0.6509, + "step": 910 + }, + { + "epoch": 1.464, + "grad_norm": 0.6835671663284302, + "learning_rate": 0.00010376901826699348, + "loss": 0.6212, + "step": 915 + }, + { + "epoch": 1.472, + "grad_norm": 0.7098750472068787, + "learning_rate": 0.00010293173300722285, + "loss": 0.7305, + "step": 920 + }, + { + "epoch": 1.48, + "grad_norm": 0.675316333770752, + "learning_rate": 0.0001020942419883357, + "loss": 0.6685, + "step": 925 + }, + { + "epoch": 1.488, + "grad_norm": 0.6675406098365784, + "learning_rate": 0.00010125660398833528, + "loss": 0.6214, + "step": 930 + }, + { + "epoch": 1.496, + "grad_norm": 0.6629154682159424, + "learning_rate": 0.0001004188777955404, + "loss": 0.6035, + "step": 935 + }, + { + "epoch": 1.504, + "grad_norm": 0.7732692360877991, + "learning_rate": 9.958112220445963e-05, + "loss": 0.5868, + "step": 940 + }, + { + "epoch": 1.512, + "grad_norm": 0.6238484978675842, + "learning_rate": 9.874339601166473e-05, + "loss": 0.6003, + "step": 945 + }, + { + "epoch": 1.52, + "grad_norm": 0.6622412800788879, + "learning_rate": 9.790575801166432e-05, + "loss": 0.5854, + "step": 950 + }, + { + "epoch": 1.528, + "grad_norm": 0.7598085999488831, + "learning_rate": 9.706826699277718e-05, + "loss": 0.5882, + "step": 955 + }, + { + "epoch": 1.536, + "grad_norm": 0.8870390057563782, + "learning_rate": 9.623098173300654e-05, + "loss": 0.7187, + "step": 960 + }, + { + "epoch": 1.544, + "grad_norm": 0.702232837677002, + "learning_rate": 9.539396099591476e-05, + "loss": 0.6156, + "step": 965 + }, + { + "epoch": 1.552, + "grad_norm": 0.694139301776886, + "learning_rate": 9.455726352649911e-05, + "loss": 0.6488, + "step": 970 + }, + { + "epoch": 1.56, + "grad_norm": 0.5684956312179565, + "learning_rate": 9.372094804706867e-05, + "loss": 0.6601, + "step": 975 + }, + { + "epoch": 1.568, + "grad_norm": 0.6327118277549744, + "learning_rate": 9.288507325312335e-05, + "loss": 0.5968, + "step": 980 + }, + { + "epoch": 1.576, + "grad_norm": 0.5732144117355347, + "learning_rate": 9.204969780923403e-05, + "loss": 0.7034, + "step": 985 + }, + { + "epoch": 1.584, + "grad_norm": 0.7025273442268372, + "learning_rate": 9.121488034492569e-05, + "loss": 0.5973, + "step": 990 + }, + { + "epoch": 1.592, + "grad_norm": 0.9270740151405334, + "learning_rate": 9.038067945056227e-05, + "loss": 0.7877, + "step": 995 + }, + { + "epoch": 1.6, + "grad_norm": 0.6676818132400513, + "learning_rate": 8.954715367323468e-05, + "loss": 0.644, + "step": 1000 + }, + { + "epoch": 1.608, + "grad_norm": 0.6687547564506531, + "learning_rate": 8.871436151265184e-05, + "loss": 0.6678, + "step": 1005 + }, + { + "epoch": 1.616, + "grad_norm": 0.7400322556495667, + "learning_rate": 8.788236141703498e-05, + "loss": 0.6088, + "step": 1010 + }, + { + "epoch": 1.624, + "grad_norm": 0.5504963994026184, + "learning_rate": 8.705121177901532e-05, + "loss": 0.6219, + "step": 1015 + }, + { + "epoch": 1.6320000000000001, + "grad_norm": 0.8088738322257996, + "learning_rate": 8.62209709315362e-05, + "loss": 0.6698, + "step": 1020 + }, + { + "epoch": 1.6400000000000001, + "grad_norm": 0.7380816340446472, + "learning_rate": 8.539169714375885e-05, + "loss": 0.6207, + "step": 1025 + }, + { + "epoch": 1.6480000000000001, + "grad_norm": 0.6346850395202637, + "learning_rate": 8.456344861697289e-05, + "loss": 0.626, + "step": 1030 + }, + { + "epoch": 1.6560000000000001, + "grad_norm": 0.49918702244758606, + "learning_rate": 8.373628348051165e-05, + "loss": 0.6972, + "step": 1035 + }, + { + "epoch": 1.6640000000000001, + "grad_norm": 0.7200607657432556, + "learning_rate": 8.291025978767235e-05, + "loss": 0.6282, + "step": 1040 + }, + { + "epoch": 1.6720000000000002, + "grad_norm": 0.8350688815116882, + "learning_rate": 8.208543551164178e-05, + "loss": 0.6219, + "step": 1045 + }, + { + "epoch": 1.6800000000000002, + "grad_norm": 0.5957468748092651, + "learning_rate": 8.126186854142752e-05, + "loss": 0.622, + "step": 1050 + }, + { + "epoch": 1.688, + "grad_norm": 0.7494757175445557, + "learning_rate": 8.04396166777952e-05, + "loss": 0.5801, + "step": 1055 + }, + { + "epoch": 1.696, + "grad_norm": 0.7109113931655884, + "learning_rate": 7.961873762921153e-05, + "loss": 0.598, + "step": 1060 + }, + { + "epoch": 1.704, + "grad_norm": 0.7941219210624695, + "learning_rate": 7.879928900779456e-05, + "loss": 0.699, + "step": 1065 + }, + { + "epoch": 1.712, + "grad_norm": 0.7282152771949768, + "learning_rate": 7.798132832526986e-05, + "loss": 0.5882, + "step": 1070 + }, + { + "epoch": 1.72, + "grad_norm": 0.5923735499382019, + "learning_rate": 7.716491298893442e-05, + "loss": 0.5321, + "step": 1075 + }, + { + "epoch": 1.728, + "grad_norm": 0.5571523308753967, + "learning_rate": 7.635010029762756e-05, + "loss": 0.6647, + "step": 1080 + }, + { + "epoch": 1.736, + "grad_norm": 0.6043043732643127, + "learning_rate": 7.553694743770928e-05, + "loss": 0.628, + "step": 1085 + }, + { + "epoch": 1.744, + "grad_norm": 0.8733720183372498, + "learning_rate": 7.472551147904708e-05, + "loss": 0.6262, + "step": 1090 + }, + { + "epoch": 1.752, + "grad_norm": 0.5561848878860474, + "learning_rate": 7.391584937101033e-05, + "loss": 0.6131, + "step": 1095 + }, + { + "epoch": 1.76, + "grad_norm": 0.6611356735229492, + "learning_rate": 7.310801793847344e-05, + "loss": 0.6494, + "step": 1100 + }, + { + "epoch": 1.768, + "grad_norm": 0.5700333714485168, + "learning_rate": 7.230207387782776e-05, + "loss": 0.5514, + "step": 1105 + }, + { + "epoch": 1.776, + "grad_norm": 0.705662727355957, + "learning_rate": 7.149807375300239e-05, + "loss": 0.5823, + "step": 1110 + }, + { + "epoch": 1.784, + "grad_norm": 0.9632449150085449, + "learning_rate": 7.069607399149428e-05, + "loss": 0.7207, + "step": 1115 + }, + { + "epoch": 1.792, + "grad_norm": 0.6412242650985718, + "learning_rate": 6.989613088040796e-05, + "loss": 0.7006, + "step": 1120 + }, + { + "epoch": 1.8, + "grad_norm": 0.538044273853302, + "learning_rate": 6.909830056250527e-05, + "loss": 0.6222, + "step": 1125 + }, + { + "epoch": 1.808, + "grad_norm": 0.6124762296676636, + "learning_rate": 6.830263903226483e-05, + "loss": 0.6569, + "step": 1130 + }, + { + "epoch": 1.8159999999999998, + "grad_norm": 0.7250857949256897, + "learning_rate": 6.750920213195238e-05, + "loss": 0.543, + "step": 1135 + }, + { + "epoch": 1.8239999999999998, + "grad_norm": 0.6840488910675049, + "learning_rate": 6.671804554770135e-05, + "loss": 0.6334, + "step": 1140 + }, + { + "epoch": 1.8319999999999999, + "grad_norm": 0.8932304382324219, + "learning_rate": 6.592922480560483e-05, + "loss": 0.6701, + "step": 1145 + }, + { + "epoch": 1.8399999999999999, + "grad_norm": 0.9220761656761169, + "learning_rate": 6.51427952678185e-05, + "loss": 0.6216, + "step": 1150 + }, + { + "epoch": 1.8479999999999999, + "grad_norm": 0.688731849193573, + "learning_rate": 6.435881212867493e-05, + "loss": 0.5877, + "step": 1155 + }, + { + "epoch": 1.8559999999999999, + "grad_norm": 0.6370134949684143, + "learning_rate": 6.357733041081018e-05, + "loss": 0.6256, + "step": 1160 + }, + { + "epoch": 1.8639999999999999, + "grad_norm": 0.7421153783798218, + "learning_rate": 6.27984049613019e-05, + "loss": 0.7062, + "step": 1165 + }, + { + "epoch": 1.8719999999999999, + "grad_norm": 0.8129108548164368, + "learning_rate": 6.20220904478199e-05, + "loss": 0.6304, + "step": 1170 + }, + { + "epoch": 1.88, + "grad_norm": 0.7866687774658203, + "learning_rate": 6.12484413547897e-05, + "loss": 0.7695, + "step": 1175 + }, + { + "epoch": 1.888, + "grad_norm": 0.8307198882102966, + "learning_rate": 6.047751197956838e-05, + "loss": 0.5723, + "step": 1180 + }, + { + "epoch": 1.896, + "grad_norm": 0.5541536211967468, + "learning_rate": 5.9709356428633746e-05, + "loss": 0.6847, + "step": 1185 + }, + { + "epoch": 1.904, + "grad_norm": 0.731555163860321, + "learning_rate": 5.8944028613787206e-05, + "loss": 0.6618, + "step": 1190 + }, + { + "epoch": 1.912, + "grad_norm": 1.020991325378418, + "learning_rate": 5.818158224836987e-05, + "loss": 0.6275, + "step": 1195 + }, + { + "epoch": 1.92, + "grad_norm": 0.7172287702560425, + "learning_rate": 5.7422070843492734e-05, + "loss": 0.5617, + "step": 1200 + }, + { + "epoch": 1.928, + "grad_norm": 0.7232096791267395, + "learning_rate": 5.666554770428129e-05, + "loss": 0.6278, + "step": 1205 + }, + { + "epoch": 1.936, + "grad_norm": 0.7234194278717041, + "learning_rate": 5.591206592613416e-05, + "loss": 0.6713, + "step": 1210 + }, + { + "epoch": 1.944, + "grad_norm": 0.6906010508537292, + "learning_rate": 5.5161678390996796e-05, + "loss": 0.6113, + "step": 1215 + }, + { + "epoch": 1.952, + "grad_norm": 0.9057906270027161, + "learning_rate": 5.441443776365003e-05, + "loss": 0.5587, + "step": 1220 + }, + { + "epoch": 1.96, + "grad_norm": 0.7144932150840759, + "learning_rate": 5.3670396488013854e-05, + "loss": 0.5601, + "step": 1225 + }, + { + "epoch": 1.968, + "grad_norm": 1.0071252584457397, + "learning_rate": 5.292960678346675e-05, + "loss": 0.5941, + "step": 1230 + }, + { + "epoch": 1.976, + "grad_norm": 0.7061260342597961, + "learning_rate": 5.2192120641180786e-05, + "loss": 0.6285, + "step": 1235 + }, + { + "epoch": 1.984, + "grad_norm": 0.6937069892883301, + "learning_rate": 5.145798982047261e-05, + "loss": 0.6516, + "step": 1240 + }, + { + "epoch": 1.992, + "grad_norm": 0.5176392793655396, + "learning_rate": 5.072726584517086e-05, + "loss": 0.5904, + "step": 1245 + }, + { + "epoch": 2.0, + "grad_norm": 0.8501409292221069, + "learning_rate": 5.000000000000002e-05, + "loss": 0.619, + "step": 1250 + }, + { + "epoch": 2.008, + "grad_norm": 0.5451085567474365, + "learning_rate": 4.927624332698109e-05, + "loss": 0.6058, + "step": 1255 + }, + { + "epoch": 2.016, + "grad_norm": 0.8410437107086182, + "learning_rate": 4.8556046621849346e-05, + "loss": 0.6248, + "step": 1260 + }, + { + "epoch": 2.024, + "grad_norm": 0.6089378595352173, + "learning_rate": 4.783946043048923e-05, + "loss": 0.5247, + "step": 1265 + }, + { + "epoch": 2.032, + "grad_norm": 0.47437986731529236, + "learning_rate": 4.712653504538683e-05, + "loss": 0.5439, + "step": 1270 + }, + { + "epoch": 2.04, + "grad_norm": 0.6925654411315918, + "learning_rate": 4.6417320502100316e-05, + "loss": 0.4491, + "step": 1275 + }, + { + "epoch": 2.048, + "grad_norm": 0.8538162708282471, + "learning_rate": 4.5711866575748276e-05, + "loss": 0.52, + "step": 1280 + }, + { + "epoch": 2.056, + "grad_norm": 0.8048768639564514, + "learning_rate": 4.501022277751602e-05, + "loss": 0.5259, + "step": 1285 + }, + { + "epoch": 2.064, + "grad_norm": 0.7191641330718994, + "learning_rate": 4.431243835118124e-05, + "loss": 0.5025, + "step": 1290 + }, + { + "epoch": 2.072, + "grad_norm": 1.0918892621994019, + "learning_rate": 4.361856226965733e-05, + "loss": 0.4772, + "step": 1295 + }, + { + "epoch": 2.08, + "grad_norm": 0.8152824640274048, + "learning_rate": 4.2928643231556844e-05, + "loss": 0.5945, + "step": 1300 + }, + { + "epoch": 2.088, + "grad_norm": 0.640073299407959, + "learning_rate": 4.224272965777326e-05, + "loss": 0.4813, + "step": 1305 + }, + { + "epoch": 2.096, + "grad_norm": 0.7172432541847229, + "learning_rate": 4.15608696880828e-05, + "loss": 0.5315, + "step": 1310 + }, + { + "epoch": 2.104, + "grad_norm": 1.1283674240112305, + "learning_rate": 4.08831111777658e-05, + "loss": 0.5591, + "step": 1315 + }, + { + "epoch": 2.112, + "grad_norm": 0.8184736967086792, + "learning_rate": 4.020950169424815e-05, + "loss": 0.605, + "step": 1320 + }, + { + "epoch": 2.12, + "grad_norm": 0.6823618412017822, + "learning_rate": 3.954008851376252e-05, + "loss": 0.4955, + "step": 1325 + }, + { + "epoch": 2.128, + "grad_norm": 0.8576385378837585, + "learning_rate": 3.887491861803085e-05, + "loss": 0.5757, + "step": 1330 + }, + { + "epoch": 2.136, + "grad_norm": 0.967835009098053, + "learning_rate": 3.821403869096658e-05, + "loss": 0.5313, + "step": 1335 + }, + { + "epoch": 2.144, + "grad_norm": 0.7330173254013062, + "learning_rate": 3.755749511539845e-05, + "loss": 0.5904, + "step": 1340 + }, + { + "epoch": 2.152, + "grad_norm": 0.6664792895317078, + "learning_rate": 3.690533396981504e-05, + "loss": 0.4679, + "step": 1345 + }, + { + "epoch": 2.16, + "grad_norm": 0.7639065980911255, + "learning_rate": 3.6257601025131026e-05, + "loss": 0.5235, + "step": 1350 + }, + { + "epoch": 2.168, + "grad_norm": 0.6960520148277283, + "learning_rate": 3.561434174147463e-05, + "loss": 0.5797, + "step": 1355 + }, + { + "epoch": 2.176, + "grad_norm": 0.8231356143951416, + "learning_rate": 3.497560126499709e-05, + "loss": 0.5772, + "step": 1360 + }, + { + "epoch": 2.184, + "grad_norm": 0.8968437910079956, + "learning_rate": 3.4341424424704375e-05, + "loss": 0.5316, + "step": 1365 + }, + { + "epoch": 2.192, + "grad_norm": 0.8037480711936951, + "learning_rate": 3.371185572931048e-05, + "loss": 0.5646, + "step": 1370 + }, + { + "epoch": 2.2, + "grad_norm": 0.9330148696899414, + "learning_rate": 3.308693936411421e-05, + "loss": 0.5431, + "step": 1375 + }, + { + "epoch": 2.208, + "grad_norm": 0.6958775520324707, + "learning_rate": 3.246671918789755e-05, + "loss": 0.5403, + "step": 1380 + }, + { + "epoch": 2.216, + "grad_norm": 0.9870476126670837, + "learning_rate": 3.1851238729848034e-05, + "loss": 0.5329, + "step": 1385 + }, + { + "epoch": 2.224, + "grad_norm": 0.5036590099334717, + "learning_rate": 3.124054118650327e-05, + "loss": 0.5696, + "step": 1390 + }, + { + "epoch": 2.232, + "grad_norm": 0.8640053868293762, + "learning_rate": 3.063466941871952e-05, + "loss": 0.59, + "step": 1395 + }, + { + "epoch": 2.24, + "grad_norm": 0.6065173149108887, + "learning_rate": 3.0033665948663448e-05, + "loss": 0.5116, + "step": 1400 + }, + { + "epoch": 2.248, + "grad_norm": 1.083775520324707, + "learning_rate": 2.9437572956827964e-05, + "loss": 0.5783, + "step": 1405 + }, + { + "epoch": 2.2560000000000002, + "grad_norm": 0.7090497016906738, + "learning_rate": 2.8846432279071467e-05, + "loss": 0.6259, + "step": 1410 + }, + { + "epoch": 2.2640000000000002, + "grad_norm": 0.742468535900116, + "learning_rate": 2.826028540368215e-05, + "loss": 0.5759, + "step": 1415 + }, + { + "epoch": 2.2720000000000002, + "grad_norm": 0.9219839572906494, + "learning_rate": 2.7679173468465812e-05, + "loss": 0.497, + "step": 1420 + }, + { + "epoch": 2.2800000000000002, + "grad_norm": 0.7159206867218018, + "learning_rate": 2.7103137257858868e-05, + "loss": 0.619, + "step": 1425 + }, + { + "epoch": 2.288, + "grad_norm": 0.6997727751731873, + "learning_rate": 2.6532217200065858e-05, + "loss": 0.5858, + "step": 1430 + }, + { + "epoch": 2.296, + "grad_norm": 0.7493643164634705, + "learning_rate": 2.5966453364222186e-05, + "loss": 0.6291, + "step": 1435 + }, + { + "epoch": 2.304, + "grad_norm": 0.8311699032783508, + "learning_rate": 2.540588545758179e-05, + "loss": 0.6418, + "step": 1440 + }, + { + "epoch": 2.312, + "grad_norm": 0.7084354758262634, + "learning_rate": 2.48505528227304e-05, + "loss": 0.5483, + "step": 1445 + }, + { + "epoch": 2.32, + "grad_norm": 0.734438955783844, + "learning_rate": 2.4300494434824373e-05, + "loss": 0.6071, + "step": 1450 + }, + { + "epoch": 2.328, + "grad_norm": 0.8913635015487671, + "learning_rate": 2.37557488988552e-05, + "loss": 0.5099, + "step": 1455 + }, + { + "epoch": 2.336, + "grad_norm": 0.8349048495292664, + "learning_rate": 2.321635444694028e-05, + "loss": 0.5186, + "step": 1460 + }, + { + "epoch": 2.344, + "grad_norm": 0.6164011359214783, + "learning_rate": 2.2682348935639274e-05, + "loss": 0.5043, + "step": 1465 + }, + { + "epoch": 2.352, + "grad_norm": 1.044892430305481, + "learning_rate": 2.2153769843297667e-05, + "loss": 0.61, + "step": 1470 + }, + { + "epoch": 2.36, + "grad_norm": 0.9142879247665405, + "learning_rate": 2.163065426741603e-05, + "loss": 0.5987, + "step": 1475 + }, + { + "epoch": 2.368, + "grad_norm": 0.6232836842536926, + "learning_rate": 2.1113038922046602e-05, + "loss": 0.5212, + "step": 1480 + }, + { + "epoch": 2.376, + "grad_norm": 0.49558231234550476, + "learning_rate": 2.0600960135216462e-05, + "loss": 0.4796, + "step": 1485 + }, + { + "epoch": 2.384, + "grad_norm": 0.7887687683105469, + "learning_rate": 2.009445384637805e-05, + "loss": 0.4844, + "step": 1490 + }, + { + "epoch": 2.392, + "grad_norm": 0.8086990714073181, + "learning_rate": 1.9593555603886538e-05, + "loss": 0.5085, + "step": 1495 + }, + { + "epoch": 2.4, + "grad_norm": 0.6713303327560425, + "learning_rate": 1.9098300562505266e-05, + "loss": 0.4839, + "step": 1500 + }, + { + "epoch": 2.408, + "grad_norm": 0.6262741684913635, + "learning_rate": 1.8608723480938206e-05, + "loss": 0.5715, + "step": 1505 + }, + { + "epoch": 2.416, + "grad_norm": 0.8025808334350586, + "learning_rate": 1.812485871939056e-05, + "loss": 0.5266, + "step": 1510 + }, + { + "epoch": 2.424, + "grad_norm": 0.8753231167793274, + "learning_rate": 1.7646740237157256e-05, + "loss": 0.5422, + "step": 1515 + }, + { + "epoch": 2.432, + "grad_norm": 0.6459301710128784, + "learning_rate": 1.7174401590239587e-05, + "loss": 0.5553, + "step": 1520 + }, + { + "epoch": 2.44, + "grad_norm": 0.6917416453361511, + "learning_rate": 1.6707875928990058e-05, + "loss": 0.5765, + "step": 1525 + }, + { + "epoch": 2.448, + "grad_norm": 0.7890029549598694, + "learning_rate": 1.6247195995785837e-05, + "loss": 0.549, + "step": 1530 + }, + { + "epoch": 2.456, + "grad_norm": 0.9913660883903503, + "learning_rate": 1.579239412273078e-05, + "loss": 0.4876, + "step": 1535 + }, + { + "epoch": 2.464, + "grad_norm": 0.9030985832214355, + "learning_rate": 1.5343502229386207e-05, + "loss": 0.5546, + "step": 1540 + }, + { + "epoch": 2.472, + "grad_norm": 0.9133403301239014, + "learning_rate": 1.4900551820530828e-05, + "loss": 0.5356, + "step": 1545 + }, + { + "epoch": 2.48, + "grad_norm": 0.7083793878555298, + "learning_rate": 1.4463573983949341e-05, + "loss": 0.5142, + "step": 1550 + }, + { + "epoch": 2.488, + "grad_norm": 1.095435619354248, + "learning_rate": 1.40325993882509e-05, + "loss": 0.6054, + "step": 1555 + }, + { + "epoch": 2.496, + "grad_norm": 0.8825190663337708, + "learning_rate": 1.3607658280716473e-05, + "loss": 0.5294, + "step": 1560 + }, + { + "epoch": 2.504, + "grad_norm": 0.9436343908309937, + "learning_rate": 1.3188780485176088e-05, + "loss": 0.5294, + "step": 1565 + }, + { + "epoch": 2.512, + "grad_norm": 1.0125439167022705, + "learning_rate": 1.2775995399915631e-05, + "loss": 0.4905, + "step": 1570 + }, + { + "epoch": 2.52, + "grad_norm": 0.8476350903511047, + "learning_rate": 1.2369331995613665e-05, + "loss": 0.5186, + "step": 1575 + }, + { + "epoch": 2.528, + "grad_norm": 0.9092681407928467, + "learning_rate": 1.196881881330798e-05, + "loss": 0.4909, + "step": 1580 + }, + { + "epoch": 2.536, + "grad_norm": 0.7970360517501831, + "learning_rate": 1.1574483962392767e-05, + "loss": 0.5303, + "step": 1585 + }, + { + "epoch": 2.544, + "grad_norm": 0.8575041890144348, + "learning_rate": 1.1186355118645554e-05, + "loss": 0.5169, + "step": 1590 + }, + { + "epoch": 2.552, + "grad_norm": 0.7397408485412598, + "learning_rate": 1.0804459522284926e-05, + "loss": 0.5339, + "step": 1595 + }, + { + "epoch": 2.56, + "grad_norm": 0.7415968179702759, + "learning_rate": 1.042882397605871e-05, + "loss": 0.5283, + "step": 1600 + }, + { + "epoch": 2.568, + "grad_norm": 0.7035180926322937, + "learning_rate": 1.0059474843362892e-05, + "loss": 0.5576, + "step": 1605 + }, + { + "epoch": 2.576, + "grad_norm": 0.9805112481117249, + "learning_rate": 9.696438046391288e-06, + "loss": 0.5136, + "step": 1610 + }, + { + "epoch": 2.584, + "grad_norm": 0.6661838889122009, + "learning_rate": 9.339739064316233e-06, + "loss": 0.5885, + "step": 1615 + }, + { + "epoch": 2.592, + "grad_norm": 0.8581559062004089, + "learning_rate": 8.989402931500434e-06, + "loss": 0.5, + "step": 1620 + }, + { + "epoch": 2.6, + "grad_norm": 0.7146279811859131, + "learning_rate": 8.645454235739903e-06, + "loss": 0.5325, + "step": 1625 + }, + { + "epoch": 2.608, + "grad_norm": 0.9474234580993652, + "learning_rate": 8.307917116538378e-06, + "loss": 0.5772, + "step": 1630 + }, + { + "epoch": 2.616, + "grad_norm": 0.9583209753036499, + "learning_rate": 7.976815263412963e-06, + "loss": 0.5736, + "step": 1635 + }, + { + "epoch": 2.624, + "grad_norm": 0.7156705260276794, + "learning_rate": 7.652171914231776e-06, + "loss": 0.5199, + "step": 1640 + }, + { + "epoch": 2.632, + "grad_norm": 0.8224849700927734, + "learning_rate": 7.3340098535827905e-06, + "loss": 0.5753, + "step": 1645 + }, + { + "epoch": 2.64, + "grad_norm": 0.8689257502555847, + "learning_rate": 7.022351411174866e-06, + "loss": 0.5424, + "step": 1650 + }, + { + "epoch": 2.648, + "grad_norm": 0.6636053323745728, + "learning_rate": 6.717218460270536e-06, + "loss": 0.5555, + "step": 1655 + }, + { + "epoch": 2.656, + "grad_norm": 0.8688860535621643, + "learning_rate": 6.418632416150927e-06, + "loss": 0.4936, + "step": 1660 + }, + { + "epoch": 2.664, + "grad_norm": 0.6272854208946228, + "learning_rate": 6.126614234612593e-06, + "loss": 0.6291, + "step": 1665 + }, + { + "epoch": 2.672, + "grad_norm": 1.2240337133407593, + "learning_rate": 5.8411844104969916e-06, + "loss": 0.5197, + "step": 1670 + }, + { + "epoch": 2.68, + "grad_norm": 0.9820936918258667, + "learning_rate": 5.562362976251901e-06, + "loss": 0.5398, + "step": 1675 + }, + { + "epoch": 2.6879999999999997, + "grad_norm": 1.1582359075546265, + "learning_rate": 5.290169500525577e-06, + "loss": 0.6059, + "step": 1680 + }, + { + "epoch": 2.6959999999999997, + "grad_norm": 0.5501114726066589, + "learning_rate": 5.024623086793323e-06, + "loss": 0.531, + "step": 1685 + }, + { + "epoch": 2.7039999999999997, + "grad_norm": 0.8848717212677002, + "learning_rate": 4.765742372016735e-06, + "loss": 0.6054, + "step": 1690 + }, + { + "epoch": 2.7119999999999997, + "grad_norm": 0.7358693480491638, + "learning_rate": 4.513545525335705e-06, + "loss": 0.5173, + "step": 1695 + }, + { + "epoch": 2.7199999999999998, + "grad_norm": 0.9218215942382812, + "learning_rate": 4.268050246793276e-06, + "loss": 0.4944, + "step": 1700 + }, + { + "epoch": 2.7279999999999998, + "grad_norm": 0.6374716758728027, + "learning_rate": 4.029273766093333e-06, + "loss": 0.5183, + "step": 1705 + }, + { + "epoch": 2.7359999999999998, + "grad_norm": 0.583243191242218, + "learning_rate": 3.797232841391407e-06, + "loss": 0.668, + "step": 1710 + }, + { + "epoch": 2.7439999999999998, + "grad_norm": 0.8384690284729004, + "learning_rate": 3.5719437581185454e-06, + "loss": 0.5068, + "step": 1715 + }, + { + "epoch": 2.752, + "grad_norm": 0.8034130334854126, + "learning_rate": 3.3534223278382405e-06, + "loss": 0.5823, + "step": 1720 + }, + { + "epoch": 2.76, + "grad_norm": 0.8146041631698608, + "learning_rate": 3.1416838871368924e-06, + "loss": 0.6111, + "step": 1725 + }, + { + "epoch": 2.768, + "grad_norm": 0.8122982382774353, + "learning_rate": 2.936743296547273e-06, + "loss": 0.5231, + "step": 1730 + }, + { + "epoch": 2.776, + "grad_norm": 0.7326982021331787, + "learning_rate": 2.738614939505646e-06, + "loss": 0.5236, + "step": 1735 + }, + { + "epoch": 2.784, + "grad_norm": 0.7472147345542908, + "learning_rate": 2.5473127213422763e-06, + "loss": 0.5657, + "step": 1740 + }, + { + "epoch": 2.792, + "grad_norm": 0.8197700381278992, + "learning_rate": 2.3628500683055222e-06, + "loss": 0.5518, + "step": 1745 + }, + { + "epoch": 2.8, + "grad_norm": 0.8733732104301453, + "learning_rate": 2.1852399266194314e-06, + "loss": 0.4908, + "step": 1750 + }, + { + "epoch": 2.808, + "grad_norm": 0.8913092017173767, + "learning_rate": 2.014494761575314e-06, + "loss": 0.5459, + "step": 1755 + }, + { + "epoch": 2.816, + "grad_norm": 1.1259772777557373, + "learning_rate": 1.8506265566567094e-06, + "loss": 0.5208, + "step": 1760 + }, + { + "epoch": 2.824, + "grad_norm": 0.7692184448242188, + "learning_rate": 1.6936468126984572e-06, + "loss": 0.5824, + "step": 1765 + }, + { + "epoch": 2.832, + "grad_norm": 0.588602602481842, + "learning_rate": 1.543566547079467e-06, + "loss": 0.5512, + "step": 1770 + }, + { + "epoch": 2.84, + "grad_norm": 0.6324055790901184, + "learning_rate": 1.400396292949513e-06, + "loss": 0.6327, + "step": 1775 + }, + { + "epoch": 2.848, + "grad_norm": 0.7608378529548645, + "learning_rate": 1.26414609848996e-06, + "loss": 0.5292, + "step": 1780 + }, + { + "epoch": 2.856, + "grad_norm": 0.7972851395606995, + "learning_rate": 1.134825526208605e-06, + "loss": 0.5692, + "step": 1785 + }, + { + "epoch": 2.864, + "grad_norm": 0.9705446362495422, + "learning_rate": 1.0124436522684243e-06, + "loss": 0.5532, + "step": 1790 + }, + { + "epoch": 2.872, + "grad_norm": 0.6317399144172668, + "learning_rate": 8.970090658507291e-07, + "loss": 0.5314, + "step": 1795 + }, + { + "epoch": 2.88, + "grad_norm": 0.6457757949829102, + "learning_rate": 7.885298685522235e-07, + "loss": 0.524, + "step": 1800 + } + ], + "logging_steps": 5, + "max_steps": 1875, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.127865302557655e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1800/training_args.bin b/checkpoint-1800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b72392a9094690809605a9b89988a05f68c770c0 --- /dev/null +++ b/checkpoint-1800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f6303cd7c1754cff6fd463f0e4dddf2a566ba7f29bbf2a72fbb560b32d5ce98 +size 5432 diff --git a/checkpoint-1875/README.md b/checkpoint-1875/README.md new file mode 100644 index 0000000000000000000000000000000000000000..31d07dbe00996b0568fd0eb91d05ac9a95722459 --- /dev/null +++ b/checkpoint-1875/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.2-1B +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-1875/adapter_config.json b/checkpoint-1875/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73202e5c7ba7132c890e52da7442b36a9efff4dd --- /dev/null +++ b/checkpoint-1875/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.2-1B", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "down_proj", + "k_proj", + "gate_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1875/adapter_model.safetensors b/checkpoint-1875/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8dccbd00f8b654b41b87d53e00ede0c2d39d4943 --- /dev/null +++ b/checkpoint-1875/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b6ca1a286feb5f305d1377fb4b2b4ba22d9b9fc3346cd70c7a0b6f26c723ab9 +size 22573704 diff --git a/checkpoint-1875/optimizer.pt b/checkpoint-1875/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..efd03d94ee8db9b6cf67feff572e90f6452fb2f4 --- /dev/null +++ b/checkpoint-1875/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1eaec017289341958e66903a06d4981ed9ffd65aac4d4dfdebf20554f1ecdb0c +size 45276986 diff --git a/checkpoint-1875/rng_state.pth b/checkpoint-1875/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..323d589b0258d085c7a07b0c4f97b26796e865c5 --- /dev/null +++ b/checkpoint-1875/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9196a1e708bf24d6abba41cce3f8558820acc3e50f9394c5955e29eb41ffea3d +size 14244 diff --git a/checkpoint-1875/scheduler.pt b/checkpoint-1875/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7bcc83046254482bec5329a31b22453caa9506df --- /dev/null +++ b/checkpoint-1875/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:293fbb7e5ee488097b1b02e80a5587b368df9f7e036db0596e2bbfbbec8e7f45 +size 1064 diff --git a/checkpoint-1875/special_tokens_map.json b/checkpoint-1875/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/checkpoint-1875/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/checkpoint-1875/tokenizer.json b/checkpoint-1875/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-1875/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-1875/tokenizer_config.json b/checkpoint-1875/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61c03bf2b5e0c03056cebf7a58c8c096dafc1ffc --- /dev/null +++ b/checkpoint-1875/tokenizer_config.json @@ -0,0 +1,2065 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end_of_text|>' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|end_of_text|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1875/trainer_state.json b/checkpoint-1875/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..20ac21ed0b8e3cd78f09abbc93a7d7b40822a71d --- /dev/null +++ b/checkpoint-1875/trainer_state.json @@ -0,0 +1,2658 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1875, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008, + "grad_norm": 0.8485889434814453, + "learning_rate": 0.0001999964908278481, + "loss": 1.2049, + "step": 5 + }, + { + "epoch": 0.016, + "grad_norm": 0.47789862751960754, + "learning_rate": 0.00019998596355767805, + "loss": 0.9333, + "step": 10 + }, + { + "epoch": 0.024, + "grad_norm": 1.017558217048645, + "learning_rate": 0.00019996841892833, + "loss": 0.8671, + "step": 15 + }, + { + "epoch": 0.032, + "grad_norm": 0.6610977053642273, + "learning_rate": 0.00019994385817114646, + "loss": 0.7979, + "step": 20 + }, + { + "epoch": 0.04, + "grad_norm": 0.6075429320335388, + "learning_rate": 0.00019991228300988585, + "loss": 0.7662, + "step": 25 + }, + { + "epoch": 0.048, + "grad_norm": 0.6595763564109802, + "learning_rate": 0.00019987369566060176, + "loss": 0.7929, + "step": 30 + }, + { + "epoch": 0.056, + "grad_norm": 0.6968618035316467, + "learning_rate": 0.00019982809883148722, + "loss": 0.7683, + "step": 35 + }, + { + "epoch": 0.064, + "grad_norm": 0.4889592230319977, + "learning_rate": 0.00019977549572268468, + "loss": 0.8667, + "step": 40 + }, + { + "epoch": 0.072, + "grad_norm": 0.6651108264923096, + "learning_rate": 0.0001997158900260614, + "loss": 0.8446, + "step": 45 + }, + { + "epoch": 0.08, + "grad_norm": 0.5898510217666626, + "learning_rate": 0.00019964928592495045, + "loss": 0.9051, + "step": 50 + }, + { + "epoch": 0.088, + "grad_norm": 0.4398016035556793, + "learning_rate": 0.00019957568809385694, + "loss": 0.7235, + "step": 55 + }, + { + "epoch": 0.096, + "grad_norm": 0.6901968121528625, + "learning_rate": 0.00019949510169813003, + "loss": 0.8169, + "step": 60 + }, + { + "epoch": 0.104, + "grad_norm": 0.6267213225364685, + "learning_rate": 0.00019940753239360047, + "loss": 0.8266, + "step": 65 + }, + { + "epoch": 0.112, + "grad_norm": 0.48524895310401917, + "learning_rate": 0.00019931298632618356, + "loss": 0.758, + "step": 70 + }, + { + "epoch": 0.12, + "grad_norm": 0.5294132232666016, + "learning_rate": 0.0001992114701314478, + "loss": 0.7759, + "step": 75 + }, + { + "epoch": 0.128, + "grad_norm": 0.48957982659339905, + "learning_rate": 0.0001991029909341493, + "loss": 0.7797, + "step": 80 + }, + { + "epoch": 0.136, + "grad_norm": 0.645412802696228, + "learning_rate": 0.00019898755634773158, + "loss": 0.7437, + "step": 85 + }, + { + "epoch": 0.144, + "grad_norm": 0.43297675251960754, + "learning_rate": 0.0001988651744737914, + "loss": 0.8043, + "step": 90 + }, + { + "epoch": 0.152, + "grad_norm": 0.5513920783996582, + "learning_rate": 0.00019873585390151003, + "loss": 0.7701, + "step": 95 + }, + { + "epoch": 0.16, + "grad_norm": 0.8462435007095337, + "learning_rate": 0.0001985996037070505, + "loss": 0.709, + "step": 100 + }, + { + "epoch": 0.168, + "grad_norm": 0.6892585158348083, + "learning_rate": 0.00019845643345292054, + "loss": 0.7377, + "step": 105 + }, + { + "epoch": 0.176, + "grad_norm": 0.4617864191532135, + "learning_rate": 0.00019830635318730154, + "loss": 0.8352, + "step": 110 + }, + { + "epoch": 0.184, + "grad_norm": 0.6300354599952698, + "learning_rate": 0.0001981493734433433, + "loss": 0.7738, + "step": 115 + }, + { + "epoch": 0.192, + "grad_norm": 0.8086859583854675, + "learning_rate": 0.0001979855052384247, + "loss": 0.8067, + "step": 120 + }, + { + "epoch": 0.2, + "grad_norm": 0.6272985935211182, + "learning_rate": 0.00019781476007338058, + "loss": 0.7456, + "step": 125 + }, + { + "epoch": 0.208, + "grad_norm": 0.44750839471817017, + "learning_rate": 0.00019763714993169452, + "loss": 0.758, + "step": 130 + }, + { + "epoch": 0.216, + "grad_norm": 0.5053977370262146, + "learning_rate": 0.00019745268727865774, + "loss": 0.7895, + "step": 135 + }, + { + "epoch": 0.224, + "grad_norm": 0.41920769214630127, + "learning_rate": 0.00019726138506049438, + "loss": 0.7302, + "step": 140 + }, + { + "epoch": 0.232, + "grad_norm": 0.38280290365219116, + "learning_rate": 0.00019706325670345275, + "loss": 0.8152, + "step": 145 + }, + { + "epoch": 0.24, + "grad_norm": 0.554710865020752, + "learning_rate": 0.0001968583161128631, + "loss": 0.8461, + "step": 150 + }, + { + "epoch": 0.248, + "grad_norm": 0.5612509250640869, + "learning_rate": 0.00019664657767216176, + "loss": 0.7787, + "step": 155 + }, + { + "epoch": 0.256, + "grad_norm": 0.610614538192749, + "learning_rate": 0.00019642805624188147, + "loss": 0.7574, + "step": 160 + }, + { + "epoch": 0.264, + "grad_norm": 0.679517924785614, + "learning_rate": 0.0001962027671586086, + "loss": 0.8487, + "step": 165 + }, + { + "epoch": 0.272, + "grad_norm": 0.6685434579849243, + "learning_rate": 0.00019597072623390668, + "loss": 0.6611, + "step": 170 + }, + { + "epoch": 0.28, + "grad_norm": 0.480293869972229, + "learning_rate": 0.00019573194975320673, + "loss": 0.7802, + "step": 175 + }, + { + "epoch": 0.288, + "grad_norm": 0.7727369070053101, + "learning_rate": 0.00019548645447466431, + "loss": 0.6727, + "step": 180 + }, + { + "epoch": 0.296, + "grad_norm": 0.6371043920516968, + "learning_rate": 0.00019523425762798329, + "loss": 0.7502, + "step": 185 + }, + { + "epoch": 0.304, + "grad_norm": 0.6399966478347778, + "learning_rate": 0.00019497537691320668, + "loss": 0.8401, + "step": 190 + }, + { + "epoch": 0.312, + "grad_norm": 0.7263137698173523, + "learning_rate": 0.00019470983049947444, + "loss": 0.7494, + "step": 195 + }, + { + "epoch": 0.32, + "grad_norm": 0.402416467666626, + "learning_rate": 0.00019443763702374812, + "loss": 0.7842, + "step": 200 + }, + { + "epoch": 0.328, + "grad_norm": 0.6639626026153564, + "learning_rate": 0.00019415881558950302, + "loss": 0.8082, + "step": 205 + }, + { + "epoch": 0.336, + "grad_norm": 0.5801042914390564, + "learning_rate": 0.00019387338576538744, + "loss": 0.7883, + "step": 210 + }, + { + "epoch": 0.344, + "grad_norm": 0.5533607006072998, + "learning_rate": 0.00019358136758384912, + "loss": 0.7356, + "step": 215 + }, + { + "epoch": 0.352, + "grad_norm": 0.6019654273986816, + "learning_rate": 0.00019328278153972947, + "loss": 0.7891, + "step": 220 + }, + { + "epoch": 0.36, + "grad_norm": 0.5344104170799255, + "learning_rate": 0.00019297764858882514, + "loss": 0.7671, + "step": 225 + }, + { + "epoch": 0.368, + "grad_norm": 0.5494843125343323, + "learning_rate": 0.0001926659901464172, + "loss": 0.6608, + "step": 230 + }, + { + "epoch": 0.376, + "grad_norm": 0.465420126914978, + "learning_rate": 0.00019234782808576824, + "loss": 0.647, + "step": 235 + }, + { + "epoch": 0.384, + "grad_norm": 0.5202775001525879, + "learning_rate": 0.00019202318473658705, + "loss": 0.729, + "step": 240 + }, + { + "epoch": 0.392, + "grad_norm": 0.5757818222045898, + "learning_rate": 0.00019169208288346166, + "loss": 0.6713, + "step": 245 + }, + { + "epoch": 0.4, + "grad_norm": 0.46555572748184204, + "learning_rate": 0.0001913545457642601, + "loss": 0.7049, + "step": 250 + }, + { + "epoch": 0.408, + "grad_norm": 0.5101790428161621, + "learning_rate": 0.00019101059706849957, + "loss": 0.7419, + "step": 255 + }, + { + "epoch": 0.416, + "grad_norm": 0.6083744764328003, + "learning_rate": 0.00019066026093568378, + "loss": 0.7148, + "step": 260 + }, + { + "epoch": 0.424, + "grad_norm": 0.4719640612602234, + "learning_rate": 0.00019030356195360874, + "loss": 0.7493, + "step": 265 + }, + { + "epoch": 0.432, + "grad_norm": 0.7365225553512573, + "learning_rate": 0.0001899405251566371, + "loss": 0.7652, + "step": 270 + }, + { + "epoch": 0.44, + "grad_norm": 0.4452705383300781, + "learning_rate": 0.0001895711760239413, + "loss": 0.7438, + "step": 275 + }, + { + "epoch": 0.448, + "grad_norm": 0.6071786284446716, + "learning_rate": 0.0001891955404777151, + "loss": 0.7683, + "step": 280 + }, + { + "epoch": 0.456, + "grad_norm": 0.5774498581886292, + "learning_rate": 0.00018881364488135448, + "loss": 0.8115, + "step": 285 + }, + { + "epoch": 0.464, + "grad_norm": 0.6134682893753052, + "learning_rate": 0.00018842551603760724, + "loss": 0.8335, + "step": 290 + }, + { + "epoch": 0.472, + "grad_norm": 0.4869893193244934, + "learning_rate": 0.00018803118118669202, + "loss": 0.6933, + "step": 295 + }, + { + "epoch": 0.48, + "grad_norm": 0.6457111239433289, + "learning_rate": 0.00018763066800438636, + "loss": 0.7515, + "step": 300 + }, + { + "epoch": 0.488, + "grad_norm": 0.59674471616745, + "learning_rate": 0.0001872240046000844, + "loss": 0.6931, + "step": 305 + }, + { + "epoch": 0.496, + "grad_norm": 0.44608160853385925, + "learning_rate": 0.00018681121951482393, + "loss": 0.782, + "step": 310 + }, + { + "epoch": 0.504, + "grad_norm": 0.5934664607048035, + "learning_rate": 0.00018639234171928353, + "loss": 0.7361, + "step": 315 + }, + { + "epoch": 0.512, + "grad_norm": 0.49716323614120483, + "learning_rate": 0.0001859674006117491, + "loss": 0.7443, + "step": 320 + }, + { + "epoch": 0.52, + "grad_norm": 0.47995495796203613, + "learning_rate": 0.00018553642601605068, + "loss": 0.7221, + "step": 325 + }, + { + "epoch": 0.528, + "grad_norm": 0.5177399516105652, + "learning_rate": 0.00018509944817946922, + "loss": 0.7622, + "step": 330 + }, + { + "epoch": 0.536, + "grad_norm": 0.6638798713684082, + "learning_rate": 0.0001846564977706138, + "loss": 0.8556, + "step": 335 + }, + { + "epoch": 0.544, + "grad_norm": 0.5056771636009216, + "learning_rate": 0.00018420760587726923, + "loss": 0.7814, + "step": 340 + }, + { + "epoch": 0.552, + "grad_norm": 0.44543707370758057, + "learning_rate": 0.0001837528040042142, + "loss": 0.722, + "step": 345 + }, + { + "epoch": 0.56, + "grad_norm": 0.6765120625495911, + "learning_rate": 0.00018329212407100994, + "loss": 0.7903, + "step": 350 + }, + { + "epoch": 0.568, + "grad_norm": 0.49232372641563416, + "learning_rate": 0.00018282559840976042, + "loss": 0.6996, + "step": 355 + }, + { + "epoch": 0.576, + "grad_norm": 0.47392791509628296, + "learning_rate": 0.00018235325976284275, + "loss": 0.773, + "step": 360 + }, + { + "epoch": 0.584, + "grad_norm": 0.5056615471839905, + "learning_rate": 0.00018187514128060946, + "loss": 0.728, + "step": 365 + }, + { + "epoch": 0.592, + "grad_norm": 0.5857616662979126, + "learning_rate": 0.00018139127651906184, + "loss": 0.7659, + "step": 370 + }, + { + "epoch": 0.6, + "grad_norm": 0.5966864228248596, + "learning_rate": 0.00018090169943749476, + "loss": 0.7039, + "step": 375 + }, + { + "epoch": 0.608, + "grad_norm": 0.4524347484111786, + "learning_rate": 0.00018040644439611348, + "loss": 0.7125, + "step": 380 + }, + { + "epoch": 0.616, + "grad_norm": 0.5570976138114929, + "learning_rate": 0.00017990554615362198, + "loss": 0.698, + "step": 385 + }, + { + "epoch": 0.624, + "grad_norm": 0.6045777201652527, + "learning_rate": 0.00017939903986478355, + "loss": 0.8255, + "step": 390 + }, + { + "epoch": 0.632, + "grad_norm": 0.6149687767028809, + "learning_rate": 0.00017888696107795342, + "loss": 0.6616, + "step": 395 + }, + { + "epoch": 0.64, + "grad_norm": 0.4873579144477844, + "learning_rate": 0.000178369345732584, + "loss": 0.7452, + "step": 400 + }, + { + "epoch": 0.648, + "grad_norm": 0.5569061636924744, + "learning_rate": 0.00017784623015670238, + "loss": 0.7652, + "step": 405 + }, + { + "epoch": 0.656, + "grad_norm": 0.5825181603431702, + "learning_rate": 0.00017731765106436073, + "loss": 0.7793, + "step": 410 + }, + { + "epoch": 0.664, + "grad_norm": 0.4047383666038513, + "learning_rate": 0.00017678364555305978, + "loss": 0.6875, + "step": 415 + }, + { + "epoch": 0.672, + "grad_norm": 0.5080836415290833, + "learning_rate": 0.0001762442511011448, + "loss": 0.7465, + "step": 420 + }, + { + "epoch": 0.68, + "grad_norm": 0.5825940370559692, + "learning_rate": 0.00017569950556517566, + "loss": 0.7205, + "step": 425 + }, + { + "epoch": 0.688, + "grad_norm": 0.476992666721344, + "learning_rate": 0.00017514944717726962, + "loss": 0.6589, + "step": 430 + }, + { + "epoch": 0.696, + "grad_norm": 0.7424727082252502, + "learning_rate": 0.00017459411454241822, + "loss": 0.7035, + "step": 435 + }, + { + "epoch": 0.704, + "grad_norm": 0.6544787287712097, + "learning_rate": 0.00017403354663577783, + "loss": 0.787, + "step": 440 + }, + { + "epoch": 0.712, + "grad_norm": 0.49425187706947327, + "learning_rate": 0.00017346778279993415, + "loss": 0.7515, + "step": 445 + }, + { + "epoch": 0.72, + "grad_norm": 0.5473236441612244, + "learning_rate": 0.00017289686274214118, + "loss": 0.7199, + "step": 450 + }, + { + "epoch": 0.728, + "grad_norm": 0.6773544549942017, + "learning_rate": 0.00017232082653153422, + "loss": 0.8037, + "step": 455 + }, + { + "epoch": 0.736, + "grad_norm": 0.6355096101760864, + "learning_rate": 0.00017173971459631787, + "loss": 0.7502, + "step": 460 + }, + { + "epoch": 0.744, + "grad_norm": 0.47867000102996826, + "learning_rate": 0.00017115356772092857, + "loss": 0.7446, + "step": 465 + }, + { + "epoch": 0.752, + "grad_norm": 0.5135357975959778, + "learning_rate": 0.0001705624270431721, + "loss": 0.6507, + "step": 470 + }, + { + "epoch": 0.76, + "grad_norm": 0.48866042494773865, + "learning_rate": 0.00016996633405133655, + "loss": 0.7164, + "step": 475 + }, + { + "epoch": 0.768, + "grad_norm": 0.5892354249954224, + "learning_rate": 0.0001693653305812805, + "loss": 0.7621, + "step": 480 + }, + { + "epoch": 0.776, + "grad_norm": 0.6633970141410828, + "learning_rate": 0.00016875945881349676, + "loss": 0.7623, + "step": 485 + }, + { + "epoch": 0.784, + "grad_norm": 0.6444060802459717, + "learning_rate": 0.000168148761270152, + "loss": 0.6606, + "step": 490 + }, + { + "epoch": 0.792, + "grad_norm": 0.7012648582458496, + "learning_rate": 0.00016753328081210245, + "loss": 0.6941, + "step": 495 + }, + { + "epoch": 0.8, + "grad_norm": 0.7064160704612732, + "learning_rate": 0.00016691306063588583, + "loss": 0.6841, + "step": 500 + }, + { + "epoch": 0.808, + "grad_norm": 0.7241398096084595, + "learning_rate": 0.00016628814427068953, + "loss": 0.6996, + "step": 505 + }, + { + "epoch": 0.816, + "grad_norm": 0.7807374596595764, + "learning_rate": 0.00016565857557529566, + "loss": 0.7542, + "step": 510 + }, + { + "epoch": 0.824, + "grad_norm": 0.763768196105957, + "learning_rate": 0.00016502439873500289, + "loss": 0.7175, + "step": 515 + }, + { + "epoch": 0.832, + "grad_norm": 0.6105090379714966, + "learning_rate": 0.0001643856582585254, + "loss": 0.7565, + "step": 520 + }, + { + "epoch": 0.84, + "grad_norm": 0.5686540603637695, + "learning_rate": 0.000163742398974869, + "loss": 0.7339, + "step": 525 + }, + { + "epoch": 0.848, + "grad_norm": 0.5341500043869019, + "learning_rate": 0.00016309466603018496, + "loss": 0.569, + "step": 530 + }, + { + "epoch": 0.856, + "grad_norm": 0.7274748682975769, + "learning_rate": 0.00016244250488460158, + "loss": 0.7556, + "step": 535 + }, + { + "epoch": 0.864, + "grad_norm": 0.7321165204048157, + "learning_rate": 0.00016178596130903344, + "loss": 0.7084, + "step": 540 + }, + { + "epoch": 0.872, + "grad_norm": 0.5086159110069275, + "learning_rate": 0.00016112508138196917, + "loss": 0.6935, + "step": 545 + }, + { + "epoch": 0.88, + "grad_norm": 0.4714389443397522, + "learning_rate": 0.0001604599114862375, + "loss": 0.7076, + "step": 550 + }, + { + "epoch": 0.888, + "grad_norm": 0.5031452178955078, + "learning_rate": 0.0001597904983057519, + "loss": 0.7151, + "step": 555 + }, + { + "epoch": 0.896, + "grad_norm": 0.7745943665504456, + "learning_rate": 0.0001591168888222342, + "loss": 0.7001, + "step": 560 + }, + { + "epoch": 0.904, + "grad_norm": 0.6076303124427795, + "learning_rate": 0.00015843913031191723, + "loss": 0.7285, + "step": 565 + }, + { + "epoch": 0.912, + "grad_norm": 0.7456529140472412, + "learning_rate": 0.00015775727034222675, + "loss": 0.8041, + "step": 570 + }, + { + "epoch": 0.92, + "grad_norm": 0.5760998725891113, + "learning_rate": 0.0001570713567684432, + "loss": 0.7353, + "step": 575 + }, + { + "epoch": 0.928, + "grad_norm": 0.7057327032089233, + "learning_rate": 0.00015638143773034267, + "loss": 0.7792, + "step": 580 + }, + { + "epoch": 0.936, + "grad_norm": 0.7615967392921448, + "learning_rate": 0.00015568756164881882, + "loss": 1.0121, + "step": 585 + }, + { + "epoch": 0.944, + "grad_norm": 0.6304950714111328, + "learning_rate": 0.000154989777222484, + "loss": 0.7727, + "step": 590 + }, + { + "epoch": 0.952, + "grad_norm": 0.6852543950080872, + "learning_rate": 0.00015428813342425177, + "loss": 0.741, + "step": 595 + }, + { + "epoch": 0.96, + "grad_norm": 0.6379660964012146, + "learning_rate": 0.00015358267949789966, + "loss": 0.6919, + "step": 600 + }, + { + "epoch": 0.968, + "grad_norm": 0.5846463441848755, + "learning_rate": 0.00015287346495461315, + "loss": 0.7163, + "step": 605 + }, + { + "epoch": 0.976, + "grad_norm": 0.5999557971954346, + "learning_rate": 0.0001521605395695108, + "loss": 0.8152, + "step": 610 + }, + { + "epoch": 0.984, + "grad_norm": 0.5806307196617126, + "learning_rate": 0.00015144395337815064, + "loss": 0.6709, + "step": 615 + }, + { + "epoch": 0.992, + "grad_norm": 0.6559942960739136, + "learning_rate": 0.00015072375667301893, + "loss": 0.6527, + "step": 620 + }, + { + "epoch": 1.0, + "grad_norm": 0.6287715435028076, + "learning_rate": 0.00015000000000000001, + "loss": 0.8194, + "step": 625 + }, + { + "epoch": 1.008, + "grad_norm": 0.616222620010376, + "learning_rate": 0.00014927273415482915, + "loss": 0.6627, + "step": 630 + }, + { + "epoch": 1.016, + "grad_norm": 0.4750412106513977, + "learning_rate": 0.0001485420101795274, + "loss": 0.6366, + "step": 635 + }, + { + "epoch": 1.024, + "grad_norm": 0.5122964978218079, + "learning_rate": 0.00014780787935881923, + "loss": 0.6717, + "step": 640 + }, + { + "epoch": 1.032, + "grad_norm": 0.7382633090019226, + "learning_rate": 0.0001470703932165333, + "loss": 0.6483, + "step": 645 + }, + { + "epoch": 1.04, + "grad_norm": 0.6540554761886597, + "learning_rate": 0.00014632960351198618, + "loss": 0.6151, + "step": 650 + }, + { + "epoch": 1.048, + "grad_norm": 0.4776591956615448, + "learning_rate": 0.00014558556223635003, + "loss": 0.6707, + "step": 655 + }, + { + "epoch": 1.056, + "grad_norm": 0.8012662529945374, + "learning_rate": 0.00014483832160900326, + "loss": 0.6125, + "step": 660 + }, + { + "epoch": 1.064, + "grad_norm": 0.6735953092575073, + "learning_rate": 0.00014408793407386588, + "loss": 0.6206, + "step": 665 + }, + { + "epoch": 1.072, + "grad_norm": 0.5640230774879456, + "learning_rate": 0.00014333445229571873, + "loss": 0.6161, + "step": 670 + }, + { + "epoch": 1.08, + "grad_norm": 0.5928654074668884, + "learning_rate": 0.00014257792915650728, + "loss": 0.6583, + "step": 675 + }, + { + "epoch": 1.088, + "grad_norm": 0.7347397208213806, + "learning_rate": 0.00014181841775163013, + "loss": 0.6222, + "step": 680 + }, + { + "epoch": 1.096, + "grad_norm": 0.593773365020752, + "learning_rate": 0.0001410559713862128, + "loss": 0.716, + "step": 685 + }, + { + "epoch": 1.104, + "grad_norm": 0.6244611144065857, + "learning_rate": 0.00014029064357136628, + "loss": 0.6198, + "step": 690 + }, + { + "epoch": 1.112, + "grad_norm": 0.5083370804786682, + "learning_rate": 0.00013952248802043165, + "loss": 0.6389, + "step": 695 + }, + { + "epoch": 1.12, + "grad_norm": 0.5241413116455078, + "learning_rate": 0.0001387515586452103, + "loss": 0.6842, + "step": 700 + }, + { + "epoch": 1.1280000000000001, + "grad_norm": 0.524029016494751, + "learning_rate": 0.00013797790955218014, + "loss": 0.6071, + "step": 705 + }, + { + "epoch": 1.1360000000000001, + "grad_norm": 0.5097878575325012, + "learning_rate": 0.00013720159503869815, + "loss": 0.5915, + "step": 710 + }, + { + "epoch": 1.144, + "grad_norm": 0.5782963037490845, + "learning_rate": 0.00013642266958918984, + "loss": 0.6794, + "step": 715 + }, + { + "epoch": 1.152, + "grad_norm": 0.6088266372680664, + "learning_rate": 0.00013564118787132506, + "loss": 0.6773, + "step": 720 + }, + { + "epoch": 1.16, + "grad_norm": 0.7768995761871338, + "learning_rate": 0.00013485720473218154, + "loss": 0.668, + "step": 725 + }, + { + "epoch": 1.168, + "grad_norm": 0.6645551919937134, + "learning_rate": 0.0001340707751943952, + "loss": 0.6997, + "step": 730 + }, + { + "epoch": 1.176, + "grad_norm": 0.9228842258453369, + "learning_rate": 0.00013328195445229868, + "loss": 0.831, + "step": 735 + }, + { + "epoch": 1.184, + "grad_norm": 0.7556049823760986, + "learning_rate": 0.00013249079786804765, + "loss": 0.6378, + "step": 740 + }, + { + "epoch": 1.192, + "grad_norm": 0.832775354385376, + "learning_rate": 0.0001316973609677352, + "loss": 0.6547, + "step": 745 + }, + { + "epoch": 1.2, + "grad_norm": 0.7329304814338684, + "learning_rate": 0.00013090169943749476, + "loss": 0.5808, + "step": 750 + }, + { + "epoch": 1.208, + "grad_norm": 0.7193475961685181, + "learning_rate": 0.00013010386911959206, + "loss": 0.5582, + "step": 755 + }, + { + "epoch": 1.216, + "grad_norm": 0.6274734735488892, + "learning_rate": 0.00012930392600850573, + "loss": 0.5801, + "step": 760 + }, + { + "epoch": 1.224, + "grad_norm": 0.6485865712165833, + "learning_rate": 0.0001285019262469976, + "loss": 0.65, + "step": 765 + }, + { + "epoch": 1.232, + "grad_norm": 0.7164427042007446, + "learning_rate": 0.00012769792612217224, + "loss": 0.6627, + "step": 770 + }, + { + "epoch": 1.24, + "grad_norm": 0.600775957107544, + "learning_rate": 0.00012689198206152657, + "loss": 0.5603, + "step": 775 + }, + { + "epoch": 1.248, + "grad_norm": 0.8377975225448608, + "learning_rate": 0.00012608415062898972, + "loss": 0.6525, + "step": 780 + }, + { + "epoch": 1.256, + "grad_norm": 0.8069924116134644, + "learning_rate": 0.00012527448852095295, + "loss": 0.6731, + "step": 785 + }, + { + "epoch": 1.264, + "grad_norm": 0.6501213908195496, + "learning_rate": 0.00012446305256229073, + "loss": 0.6255, + "step": 790 + }, + { + "epoch": 1.272, + "grad_norm": 0.62812340259552, + "learning_rate": 0.00012364989970237248, + "loss": 0.6585, + "step": 795 + }, + { + "epoch": 1.28, + "grad_norm": 0.5702307820320129, + "learning_rate": 0.00012283508701106557, + "loss": 0.5996, + "step": 800 + }, + { + "epoch": 1.288, + "grad_norm": 0.6311281323432922, + "learning_rate": 0.00012201867167473015, + "loss": 0.6355, + "step": 805 + }, + { + "epoch": 1.296, + "grad_norm": 0.5885419249534607, + "learning_rate": 0.00012120071099220549, + "loss": 0.6615, + "step": 810 + }, + { + "epoch": 1.304, + "grad_norm": 0.5239307284355164, + "learning_rate": 0.0001203812623707885, + "loss": 0.6096, + "step": 815 + }, + { + "epoch": 1.312, + "grad_norm": 0.6101869940757751, + "learning_rate": 0.00011956038332220483, + "loss": 0.5984, + "step": 820 + }, + { + "epoch": 1.32, + "grad_norm": 0.4395413100719452, + "learning_rate": 0.00011873813145857249, + "loss": 0.5569, + "step": 825 + }, + { + "epoch": 1.328, + "grad_norm": 0.8984820246696472, + "learning_rate": 0.00011791456448835825, + "loss": 0.7088, + "step": 830 + }, + { + "epoch": 1.336, + "grad_norm": 0.7709664106369019, + "learning_rate": 0.00011708974021232769, + "loss": 0.6731, + "step": 835 + }, + { + "epoch": 1.3439999999999999, + "grad_norm": 0.6782217025756836, + "learning_rate": 0.00011626371651948838, + "loss": 0.6188, + "step": 840 + }, + { + "epoch": 1.3519999999999999, + "grad_norm": 0.6427358984947205, + "learning_rate": 0.00011543655138302714, + "loss": 0.7004, + "step": 845 + }, + { + "epoch": 1.3599999999999999, + "grad_norm": 0.5902594923973083, + "learning_rate": 0.00011460830285624118, + "loss": 0.5884, + "step": 850 + }, + { + "epoch": 1.3679999999999999, + "grad_norm": 0.5935835838317871, + "learning_rate": 0.0001137790290684638, + "loss": 0.5739, + "step": 855 + }, + { + "epoch": 1.376, + "grad_norm": 0.6752728223800659, + "learning_rate": 0.00011294878822098469, + "loss": 0.6435, + "step": 860 + }, + { + "epoch": 1.384, + "grad_norm": 0.7927135825157166, + "learning_rate": 0.00011211763858296507, + "loss": 0.6897, + "step": 865 + }, + { + "epoch": 1.392, + "grad_norm": 0.714499294757843, + "learning_rate": 0.00011128563848734816, + "loss": 0.6641, + "step": 870 + }, + { + "epoch": 1.4, + "grad_norm": 0.7086356282234192, + "learning_rate": 0.00011045284632676536, + "loss": 0.6273, + "step": 875 + }, + { + "epoch": 1.408, + "grad_norm": 0.6125518679618835, + "learning_rate": 0.00010961932054943778, + "loss": 0.6437, + "step": 880 + }, + { + "epoch": 1.416, + "grad_norm": 0.5635287165641785, + "learning_rate": 0.00010878511965507434, + "loss": 0.6345, + "step": 885 + }, + { + "epoch": 1.424, + "grad_norm": 0.47936007380485535, + "learning_rate": 0.00010795030219076599, + "loss": 0.5913, + "step": 890 + }, + { + "epoch": 1.432, + "grad_norm": 0.7142558097839355, + "learning_rate": 0.00010711492674687671, + "loss": 0.6482, + "step": 895 + }, + { + "epoch": 1.44, + "grad_norm": 0.5252729058265686, + "learning_rate": 0.00010627905195293135, + "loss": 0.6165, + "step": 900 + }, + { + "epoch": 1.448, + "grad_norm": 0.896318793296814, + "learning_rate": 0.00010544273647350092, + "loss": 0.634, + "step": 905 + }, + { + "epoch": 1.456, + "grad_norm": 0.6029036045074463, + "learning_rate": 0.00010460603900408523, + "loss": 0.6509, + "step": 910 + }, + { + "epoch": 1.464, + "grad_norm": 0.6835671663284302, + "learning_rate": 0.00010376901826699348, + "loss": 0.6212, + "step": 915 + }, + { + "epoch": 1.472, + "grad_norm": 0.7098750472068787, + "learning_rate": 0.00010293173300722285, + "loss": 0.7305, + "step": 920 + }, + { + "epoch": 1.48, + "grad_norm": 0.675316333770752, + "learning_rate": 0.0001020942419883357, + "loss": 0.6685, + "step": 925 + }, + { + "epoch": 1.488, + "grad_norm": 0.6675406098365784, + "learning_rate": 0.00010125660398833528, + "loss": 0.6214, + "step": 930 + }, + { + "epoch": 1.496, + "grad_norm": 0.6629154682159424, + "learning_rate": 0.0001004188777955404, + "loss": 0.6035, + "step": 935 + }, + { + "epoch": 1.504, + "grad_norm": 0.7732692360877991, + "learning_rate": 9.958112220445963e-05, + "loss": 0.5868, + "step": 940 + }, + { + "epoch": 1.512, + "grad_norm": 0.6238484978675842, + "learning_rate": 9.874339601166473e-05, + "loss": 0.6003, + "step": 945 + }, + { + "epoch": 1.52, + "grad_norm": 0.6622412800788879, + "learning_rate": 9.790575801166432e-05, + "loss": 0.5854, + "step": 950 + }, + { + "epoch": 1.528, + "grad_norm": 0.7598085999488831, + "learning_rate": 9.706826699277718e-05, + "loss": 0.5882, + "step": 955 + }, + { + "epoch": 1.536, + "grad_norm": 0.8870390057563782, + "learning_rate": 9.623098173300654e-05, + "loss": 0.7187, + "step": 960 + }, + { + "epoch": 1.544, + "grad_norm": 0.702232837677002, + "learning_rate": 9.539396099591476e-05, + "loss": 0.6156, + "step": 965 + }, + { + "epoch": 1.552, + "grad_norm": 0.694139301776886, + "learning_rate": 9.455726352649911e-05, + "loss": 0.6488, + "step": 970 + }, + { + "epoch": 1.56, + "grad_norm": 0.5684956312179565, + "learning_rate": 9.372094804706867e-05, + "loss": 0.6601, + "step": 975 + }, + { + "epoch": 1.568, + "grad_norm": 0.6327118277549744, + "learning_rate": 9.288507325312335e-05, + "loss": 0.5968, + "step": 980 + }, + { + "epoch": 1.576, + "grad_norm": 0.5732144117355347, + "learning_rate": 9.204969780923403e-05, + "loss": 0.7034, + "step": 985 + }, + { + "epoch": 1.584, + "grad_norm": 0.7025273442268372, + "learning_rate": 9.121488034492569e-05, + "loss": 0.5973, + "step": 990 + }, + { + "epoch": 1.592, + "grad_norm": 0.9270740151405334, + "learning_rate": 9.038067945056227e-05, + "loss": 0.7877, + "step": 995 + }, + { + "epoch": 1.6, + "grad_norm": 0.6676818132400513, + "learning_rate": 8.954715367323468e-05, + "loss": 0.644, + "step": 1000 + }, + { + "epoch": 1.608, + "grad_norm": 0.6687547564506531, + "learning_rate": 8.871436151265184e-05, + "loss": 0.6678, + "step": 1005 + }, + { + "epoch": 1.616, + "grad_norm": 0.7400322556495667, + "learning_rate": 8.788236141703498e-05, + "loss": 0.6088, + "step": 1010 + }, + { + "epoch": 1.624, + "grad_norm": 0.5504963994026184, + "learning_rate": 8.705121177901532e-05, + "loss": 0.6219, + "step": 1015 + }, + { + "epoch": 1.6320000000000001, + "grad_norm": 0.8088738322257996, + "learning_rate": 8.62209709315362e-05, + "loss": 0.6698, + "step": 1020 + }, + { + "epoch": 1.6400000000000001, + "grad_norm": 0.7380816340446472, + "learning_rate": 8.539169714375885e-05, + "loss": 0.6207, + "step": 1025 + }, + { + "epoch": 1.6480000000000001, + "grad_norm": 0.6346850395202637, + "learning_rate": 8.456344861697289e-05, + "loss": 0.626, + "step": 1030 + }, + { + "epoch": 1.6560000000000001, + "grad_norm": 0.49918702244758606, + "learning_rate": 8.373628348051165e-05, + "loss": 0.6972, + "step": 1035 + }, + { + "epoch": 1.6640000000000001, + "grad_norm": 0.7200607657432556, + "learning_rate": 8.291025978767235e-05, + "loss": 0.6282, + "step": 1040 + }, + { + "epoch": 1.6720000000000002, + "grad_norm": 0.8350688815116882, + "learning_rate": 8.208543551164178e-05, + "loss": 0.6219, + "step": 1045 + }, + { + "epoch": 1.6800000000000002, + "grad_norm": 0.5957468748092651, + "learning_rate": 8.126186854142752e-05, + "loss": 0.622, + "step": 1050 + }, + { + "epoch": 1.688, + "grad_norm": 0.7494757175445557, + "learning_rate": 8.04396166777952e-05, + "loss": 0.5801, + "step": 1055 + }, + { + "epoch": 1.696, + "grad_norm": 0.7109113931655884, + "learning_rate": 7.961873762921153e-05, + "loss": 0.598, + "step": 1060 + }, + { + "epoch": 1.704, + "grad_norm": 0.7941219210624695, + "learning_rate": 7.879928900779456e-05, + "loss": 0.699, + "step": 1065 + }, + { + "epoch": 1.712, + "grad_norm": 0.7282152771949768, + "learning_rate": 7.798132832526986e-05, + "loss": 0.5882, + "step": 1070 + }, + { + "epoch": 1.72, + "grad_norm": 0.5923735499382019, + "learning_rate": 7.716491298893442e-05, + "loss": 0.5321, + "step": 1075 + }, + { + "epoch": 1.728, + "grad_norm": 0.5571523308753967, + "learning_rate": 7.635010029762756e-05, + "loss": 0.6647, + "step": 1080 + }, + { + "epoch": 1.736, + "grad_norm": 0.6043043732643127, + "learning_rate": 7.553694743770928e-05, + "loss": 0.628, + "step": 1085 + }, + { + "epoch": 1.744, + "grad_norm": 0.8733720183372498, + "learning_rate": 7.472551147904708e-05, + "loss": 0.6262, + "step": 1090 + }, + { + "epoch": 1.752, + "grad_norm": 0.5561848878860474, + "learning_rate": 7.391584937101033e-05, + "loss": 0.6131, + "step": 1095 + }, + { + "epoch": 1.76, + "grad_norm": 0.6611356735229492, + "learning_rate": 7.310801793847344e-05, + "loss": 0.6494, + "step": 1100 + }, + { + "epoch": 1.768, + "grad_norm": 0.5700333714485168, + "learning_rate": 7.230207387782776e-05, + "loss": 0.5514, + "step": 1105 + }, + { + "epoch": 1.776, + "grad_norm": 0.705662727355957, + "learning_rate": 7.149807375300239e-05, + "loss": 0.5823, + "step": 1110 + }, + { + "epoch": 1.784, + "grad_norm": 0.9632449150085449, + "learning_rate": 7.069607399149428e-05, + "loss": 0.7207, + "step": 1115 + }, + { + "epoch": 1.792, + "grad_norm": 0.6412242650985718, + "learning_rate": 6.989613088040796e-05, + "loss": 0.7006, + "step": 1120 + }, + { + "epoch": 1.8, + "grad_norm": 0.538044273853302, + "learning_rate": 6.909830056250527e-05, + "loss": 0.6222, + "step": 1125 + }, + { + "epoch": 1.808, + "grad_norm": 0.6124762296676636, + "learning_rate": 6.830263903226483e-05, + "loss": 0.6569, + "step": 1130 + }, + { + "epoch": 1.8159999999999998, + "grad_norm": 0.7250857949256897, + "learning_rate": 6.750920213195238e-05, + "loss": 0.543, + "step": 1135 + }, + { + "epoch": 1.8239999999999998, + "grad_norm": 0.6840488910675049, + "learning_rate": 6.671804554770135e-05, + "loss": 0.6334, + "step": 1140 + }, + { + "epoch": 1.8319999999999999, + "grad_norm": 0.8932304382324219, + "learning_rate": 6.592922480560483e-05, + "loss": 0.6701, + "step": 1145 + }, + { + "epoch": 1.8399999999999999, + "grad_norm": 0.9220761656761169, + "learning_rate": 6.51427952678185e-05, + "loss": 0.6216, + "step": 1150 + }, + { + "epoch": 1.8479999999999999, + "grad_norm": 0.688731849193573, + "learning_rate": 6.435881212867493e-05, + "loss": 0.5877, + "step": 1155 + }, + { + "epoch": 1.8559999999999999, + "grad_norm": 0.6370134949684143, + "learning_rate": 6.357733041081018e-05, + "loss": 0.6256, + "step": 1160 + }, + { + "epoch": 1.8639999999999999, + "grad_norm": 0.7421153783798218, + "learning_rate": 6.27984049613019e-05, + "loss": 0.7062, + "step": 1165 + }, + { + "epoch": 1.8719999999999999, + "grad_norm": 0.8129108548164368, + "learning_rate": 6.20220904478199e-05, + "loss": 0.6304, + "step": 1170 + }, + { + "epoch": 1.88, + "grad_norm": 0.7866687774658203, + "learning_rate": 6.12484413547897e-05, + "loss": 0.7695, + "step": 1175 + }, + { + "epoch": 1.888, + "grad_norm": 0.8307198882102966, + "learning_rate": 6.047751197956838e-05, + "loss": 0.5723, + "step": 1180 + }, + { + "epoch": 1.896, + "grad_norm": 0.5541536211967468, + "learning_rate": 5.9709356428633746e-05, + "loss": 0.6847, + "step": 1185 + }, + { + "epoch": 1.904, + "grad_norm": 0.731555163860321, + "learning_rate": 5.8944028613787206e-05, + "loss": 0.6618, + "step": 1190 + }, + { + "epoch": 1.912, + "grad_norm": 1.020991325378418, + "learning_rate": 5.818158224836987e-05, + "loss": 0.6275, + "step": 1195 + }, + { + "epoch": 1.92, + "grad_norm": 0.7172287702560425, + "learning_rate": 5.7422070843492734e-05, + "loss": 0.5617, + "step": 1200 + }, + { + "epoch": 1.928, + "grad_norm": 0.7232096791267395, + "learning_rate": 5.666554770428129e-05, + "loss": 0.6278, + "step": 1205 + }, + { + "epoch": 1.936, + "grad_norm": 0.7234194278717041, + "learning_rate": 5.591206592613416e-05, + "loss": 0.6713, + "step": 1210 + }, + { + "epoch": 1.944, + "grad_norm": 0.6906010508537292, + "learning_rate": 5.5161678390996796e-05, + "loss": 0.6113, + "step": 1215 + }, + { + "epoch": 1.952, + "grad_norm": 0.9057906270027161, + "learning_rate": 5.441443776365003e-05, + "loss": 0.5587, + "step": 1220 + }, + { + "epoch": 1.96, + "grad_norm": 0.7144932150840759, + "learning_rate": 5.3670396488013854e-05, + "loss": 0.5601, + "step": 1225 + }, + { + "epoch": 1.968, + "grad_norm": 1.0071252584457397, + "learning_rate": 5.292960678346675e-05, + "loss": 0.5941, + "step": 1230 + }, + { + "epoch": 1.976, + "grad_norm": 0.7061260342597961, + "learning_rate": 5.2192120641180786e-05, + "loss": 0.6285, + "step": 1235 + }, + { + "epoch": 1.984, + "grad_norm": 0.6937069892883301, + "learning_rate": 5.145798982047261e-05, + "loss": 0.6516, + "step": 1240 + }, + { + "epoch": 1.992, + "grad_norm": 0.5176392793655396, + "learning_rate": 5.072726584517086e-05, + "loss": 0.5904, + "step": 1245 + }, + { + "epoch": 2.0, + "grad_norm": 0.8501409292221069, + "learning_rate": 5.000000000000002e-05, + "loss": 0.619, + "step": 1250 + }, + { + "epoch": 2.008, + "grad_norm": 0.5451085567474365, + "learning_rate": 4.927624332698109e-05, + "loss": 0.6058, + "step": 1255 + }, + { + "epoch": 2.016, + "grad_norm": 0.8410437107086182, + "learning_rate": 4.8556046621849346e-05, + "loss": 0.6248, + "step": 1260 + }, + { + "epoch": 2.024, + "grad_norm": 0.6089378595352173, + "learning_rate": 4.783946043048923e-05, + "loss": 0.5247, + "step": 1265 + }, + { + "epoch": 2.032, + "grad_norm": 0.47437986731529236, + "learning_rate": 4.712653504538683e-05, + "loss": 0.5439, + "step": 1270 + }, + { + "epoch": 2.04, + "grad_norm": 0.6925654411315918, + "learning_rate": 4.6417320502100316e-05, + "loss": 0.4491, + "step": 1275 + }, + { + "epoch": 2.048, + "grad_norm": 0.8538162708282471, + "learning_rate": 4.5711866575748276e-05, + "loss": 0.52, + "step": 1280 + }, + { + "epoch": 2.056, + "grad_norm": 0.8048768639564514, + "learning_rate": 4.501022277751602e-05, + "loss": 0.5259, + "step": 1285 + }, + { + "epoch": 2.064, + "grad_norm": 0.7191641330718994, + "learning_rate": 4.431243835118124e-05, + "loss": 0.5025, + "step": 1290 + }, + { + "epoch": 2.072, + "grad_norm": 1.0918892621994019, + "learning_rate": 4.361856226965733e-05, + "loss": 0.4772, + "step": 1295 + }, + { + "epoch": 2.08, + "grad_norm": 0.8152824640274048, + "learning_rate": 4.2928643231556844e-05, + "loss": 0.5945, + "step": 1300 + }, + { + "epoch": 2.088, + "grad_norm": 0.640073299407959, + "learning_rate": 4.224272965777326e-05, + "loss": 0.4813, + "step": 1305 + }, + { + "epoch": 2.096, + "grad_norm": 0.7172432541847229, + "learning_rate": 4.15608696880828e-05, + "loss": 0.5315, + "step": 1310 + }, + { + "epoch": 2.104, + "grad_norm": 1.1283674240112305, + "learning_rate": 4.08831111777658e-05, + "loss": 0.5591, + "step": 1315 + }, + { + "epoch": 2.112, + "grad_norm": 0.8184736967086792, + "learning_rate": 4.020950169424815e-05, + "loss": 0.605, + "step": 1320 + }, + { + "epoch": 2.12, + "grad_norm": 0.6823618412017822, + "learning_rate": 3.954008851376252e-05, + "loss": 0.4955, + "step": 1325 + }, + { + "epoch": 2.128, + "grad_norm": 0.8576385378837585, + "learning_rate": 3.887491861803085e-05, + "loss": 0.5757, + "step": 1330 + }, + { + "epoch": 2.136, + "grad_norm": 0.967835009098053, + "learning_rate": 3.821403869096658e-05, + "loss": 0.5313, + "step": 1335 + }, + { + "epoch": 2.144, + "grad_norm": 0.7330173254013062, + "learning_rate": 3.755749511539845e-05, + "loss": 0.5904, + "step": 1340 + }, + { + "epoch": 2.152, + "grad_norm": 0.6664792895317078, + "learning_rate": 3.690533396981504e-05, + "loss": 0.4679, + "step": 1345 + }, + { + "epoch": 2.16, + "grad_norm": 0.7639065980911255, + "learning_rate": 3.6257601025131026e-05, + "loss": 0.5235, + "step": 1350 + }, + { + "epoch": 2.168, + "grad_norm": 0.6960520148277283, + "learning_rate": 3.561434174147463e-05, + "loss": 0.5797, + "step": 1355 + }, + { + "epoch": 2.176, + "grad_norm": 0.8231356143951416, + "learning_rate": 3.497560126499709e-05, + "loss": 0.5772, + "step": 1360 + }, + { + "epoch": 2.184, + "grad_norm": 0.8968437910079956, + "learning_rate": 3.4341424424704375e-05, + "loss": 0.5316, + "step": 1365 + }, + { + "epoch": 2.192, + "grad_norm": 0.8037480711936951, + "learning_rate": 3.371185572931048e-05, + "loss": 0.5646, + "step": 1370 + }, + { + "epoch": 2.2, + "grad_norm": 0.9330148696899414, + "learning_rate": 3.308693936411421e-05, + "loss": 0.5431, + "step": 1375 + }, + { + "epoch": 2.208, + "grad_norm": 0.6958775520324707, + "learning_rate": 3.246671918789755e-05, + "loss": 0.5403, + "step": 1380 + }, + { + "epoch": 2.216, + "grad_norm": 0.9870476126670837, + "learning_rate": 3.1851238729848034e-05, + "loss": 0.5329, + "step": 1385 + }, + { + "epoch": 2.224, + "grad_norm": 0.5036590099334717, + "learning_rate": 3.124054118650327e-05, + "loss": 0.5696, + "step": 1390 + }, + { + "epoch": 2.232, + "grad_norm": 0.8640053868293762, + "learning_rate": 3.063466941871952e-05, + "loss": 0.59, + "step": 1395 + }, + { + "epoch": 2.24, + "grad_norm": 0.6065173149108887, + "learning_rate": 3.0033665948663448e-05, + "loss": 0.5116, + "step": 1400 + }, + { + "epoch": 2.248, + "grad_norm": 1.083775520324707, + "learning_rate": 2.9437572956827964e-05, + "loss": 0.5783, + "step": 1405 + }, + { + "epoch": 2.2560000000000002, + "grad_norm": 0.7090497016906738, + "learning_rate": 2.8846432279071467e-05, + "loss": 0.6259, + "step": 1410 + }, + { + "epoch": 2.2640000000000002, + "grad_norm": 0.742468535900116, + "learning_rate": 2.826028540368215e-05, + "loss": 0.5759, + "step": 1415 + }, + { + "epoch": 2.2720000000000002, + "grad_norm": 0.9219839572906494, + "learning_rate": 2.7679173468465812e-05, + "loss": 0.497, + "step": 1420 + }, + { + "epoch": 2.2800000000000002, + "grad_norm": 0.7159206867218018, + "learning_rate": 2.7103137257858868e-05, + "loss": 0.619, + "step": 1425 + }, + { + "epoch": 2.288, + "grad_norm": 0.6997727751731873, + "learning_rate": 2.6532217200065858e-05, + "loss": 0.5858, + "step": 1430 + }, + { + "epoch": 2.296, + "grad_norm": 0.7493643164634705, + "learning_rate": 2.5966453364222186e-05, + "loss": 0.6291, + "step": 1435 + }, + { + "epoch": 2.304, + "grad_norm": 0.8311699032783508, + "learning_rate": 2.540588545758179e-05, + "loss": 0.6418, + "step": 1440 + }, + { + "epoch": 2.312, + "grad_norm": 0.7084354758262634, + "learning_rate": 2.48505528227304e-05, + "loss": 0.5483, + "step": 1445 + }, + { + "epoch": 2.32, + "grad_norm": 0.734438955783844, + "learning_rate": 2.4300494434824373e-05, + "loss": 0.6071, + "step": 1450 + }, + { + "epoch": 2.328, + "grad_norm": 0.8913635015487671, + "learning_rate": 2.37557488988552e-05, + "loss": 0.5099, + "step": 1455 + }, + { + "epoch": 2.336, + "grad_norm": 0.8349048495292664, + "learning_rate": 2.321635444694028e-05, + "loss": 0.5186, + "step": 1460 + }, + { + "epoch": 2.344, + "grad_norm": 0.6164011359214783, + "learning_rate": 2.2682348935639274e-05, + "loss": 0.5043, + "step": 1465 + }, + { + "epoch": 2.352, + "grad_norm": 1.044892430305481, + "learning_rate": 2.2153769843297667e-05, + "loss": 0.61, + "step": 1470 + }, + { + "epoch": 2.36, + "grad_norm": 0.9142879247665405, + "learning_rate": 2.163065426741603e-05, + "loss": 0.5987, + "step": 1475 + }, + { + "epoch": 2.368, + "grad_norm": 0.6232836842536926, + "learning_rate": 2.1113038922046602e-05, + "loss": 0.5212, + "step": 1480 + }, + { + "epoch": 2.376, + "grad_norm": 0.49558231234550476, + "learning_rate": 2.0600960135216462e-05, + "loss": 0.4796, + "step": 1485 + }, + { + "epoch": 2.384, + "grad_norm": 0.7887687683105469, + "learning_rate": 2.009445384637805e-05, + "loss": 0.4844, + "step": 1490 + }, + { + "epoch": 2.392, + "grad_norm": 0.8086990714073181, + "learning_rate": 1.9593555603886538e-05, + "loss": 0.5085, + "step": 1495 + }, + { + "epoch": 2.4, + "grad_norm": 0.6713303327560425, + "learning_rate": 1.9098300562505266e-05, + "loss": 0.4839, + "step": 1500 + }, + { + "epoch": 2.408, + "grad_norm": 0.6262741684913635, + "learning_rate": 1.8608723480938206e-05, + "loss": 0.5715, + "step": 1505 + }, + { + "epoch": 2.416, + "grad_norm": 0.8025808334350586, + "learning_rate": 1.812485871939056e-05, + "loss": 0.5266, + "step": 1510 + }, + { + "epoch": 2.424, + "grad_norm": 0.8753231167793274, + "learning_rate": 1.7646740237157256e-05, + "loss": 0.5422, + "step": 1515 + }, + { + "epoch": 2.432, + "grad_norm": 0.6459301710128784, + "learning_rate": 1.7174401590239587e-05, + "loss": 0.5553, + "step": 1520 + }, + { + "epoch": 2.44, + "grad_norm": 0.6917416453361511, + "learning_rate": 1.6707875928990058e-05, + "loss": 0.5765, + "step": 1525 + }, + { + "epoch": 2.448, + "grad_norm": 0.7890029549598694, + "learning_rate": 1.6247195995785837e-05, + "loss": 0.549, + "step": 1530 + }, + { + "epoch": 2.456, + "grad_norm": 0.9913660883903503, + "learning_rate": 1.579239412273078e-05, + "loss": 0.4876, + "step": 1535 + }, + { + "epoch": 2.464, + "grad_norm": 0.9030985832214355, + "learning_rate": 1.5343502229386207e-05, + "loss": 0.5546, + "step": 1540 + }, + { + "epoch": 2.472, + "grad_norm": 0.9133403301239014, + "learning_rate": 1.4900551820530828e-05, + "loss": 0.5356, + "step": 1545 + }, + { + "epoch": 2.48, + "grad_norm": 0.7083793878555298, + "learning_rate": 1.4463573983949341e-05, + "loss": 0.5142, + "step": 1550 + }, + { + "epoch": 2.488, + "grad_norm": 1.095435619354248, + "learning_rate": 1.40325993882509e-05, + "loss": 0.6054, + "step": 1555 + }, + { + "epoch": 2.496, + "grad_norm": 0.8825190663337708, + "learning_rate": 1.3607658280716473e-05, + "loss": 0.5294, + "step": 1560 + }, + { + "epoch": 2.504, + "grad_norm": 0.9436343908309937, + "learning_rate": 1.3188780485176088e-05, + "loss": 0.5294, + "step": 1565 + }, + { + "epoch": 2.512, + "grad_norm": 1.0125439167022705, + "learning_rate": 1.2775995399915631e-05, + "loss": 0.4905, + "step": 1570 + }, + { + "epoch": 2.52, + "grad_norm": 0.8476350903511047, + "learning_rate": 1.2369331995613665e-05, + "loss": 0.5186, + "step": 1575 + }, + { + "epoch": 2.528, + "grad_norm": 0.9092681407928467, + "learning_rate": 1.196881881330798e-05, + "loss": 0.4909, + "step": 1580 + }, + { + "epoch": 2.536, + "grad_norm": 0.7970360517501831, + "learning_rate": 1.1574483962392767e-05, + "loss": 0.5303, + "step": 1585 + }, + { + "epoch": 2.544, + "grad_norm": 0.8575041890144348, + "learning_rate": 1.1186355118645554e-05, + "loss": 0.5169, + "step": 1590 + }, + { + "epoch": 2.552, + "grad_norm": 0.7397408485412598, + "learning_rate": 1.0804459522284926e-05, + "loss": 0.5339, + "step": 1595 + }, + { + "epoch": 2.56, + "grad_norm": 0.7415968179702759, + "learning_rate": 1.042882397605871e-05, + "loss": 0.5283, + "step": 1600 + }, + { + "epoch": 2.568, + "grad_norm": 0.7035180926322937, + "learning_rate": 1.0059474843362892e-05, + "loss": 0.5576, + "step": 1605 + }, + { + "epoch": 2.576, + "grad_norm": 0.9805112481117249, + "learning_rate": 9.696438046391288e-06, + "loss": 0.5136, + "step": 1610 + }, + { + "epoch": 2.584, + "grad_norm": 0.6661838889122009, + "learning_rate": 9.339739064316233e-06, + "loss": 0.5885, + "step": 1615 + }, + { + "epoch": 2.592, + "grad_norm": 0.8581559062004089, + "learning_rate": 8.989402931500434e-06, + "loss": 0.5, + "step": 1620 + }, + { + "epoch": 2.6, + "grad_norm": 0.7146279811859131, + "learning_rate": 8.645454235739903e-06, + "loss": 0.5325, + "step": 1625 + }, + { + "epoch": 2.608, + "grad_norm": 0.9474234580993652, + "learning_rate": 8.307917116538378e-06, + "loss": 0.5772, + "step": 1630 + }, + { + "epoch": 2.616, + "grad_norm": 0.9583209753036499, + "learning_rate": 7.976815263412963e-06, + "loss": 0.5736, + "step": 1635 + }, + { + "epoch": 2.624, + "grad_norm": 0.7156705260276794, + "learning_rate": 7.652171914231776e-06, + "loss": 0.5199, + "step": 1640 + }, + { + "epoch": 2.632, + "grad_norm": 0.8224849700927734, + "learning_rate": 7.3340098535827905e-06, + "loss": 0.5753, + "step": 1645 + }, + { + "epoch": 2.64, + "grad_norm": 0.8689257502555847, + "learning_rate": 7.022351411174866e-06, + "loss": 0.5424, + "step": 1650 + }, + { + "epoch": 2.648, + "grad_norm": 0.6636053323745728, + "learning_rate": 6.717218460270536e-06, + "loss": 0.5555, + "step": 1655 + }, + { + "epoch": 2.656, + "grad_norm": 0.8688860535621643, + "learning_rate": 6.418632416150927e-06, + "loss": 0.4936, + "step": 1660 + }, + { + "epoch": 2.664, + "grad_norm": 0.6272854208946228, + "learning_rate": 6.126614234612593e-06, + "loss": 0.6291, + "step": 1665 + }, + { + "epoch": 2.672, + "grad_norm": 1.2240337133407593, + "learning_rate": 5.8411844104969916e-06, + "loss": 0.5197, + "step": 1670 + }, + { + "epoch": 2.68, + "grad_norm": 0.9820936918258667, + "learning_rate": 5.562362976251901e-06, + "loss": 0.5398, + "step": 1675 + }, + { + "epoch": 2.6879999999999997, + "grad_norm": 1.1582359075546265, + "learning_rate": 5.290169500525577e-06, + "loss": 0.6059, + "step": 1680 + }, + { + "epoch": 2.6959999999999997, + "grad_norm": 0.5501114726066589, + "learning_rate": 5.024623086793323e-06, + "loss": 0.531, + "step": 1685 + }, + { + "epoch": 2.7039999999999997, + "grad_norm": 0.8848717212677002, + "learning_rate": 4.765742372016735e-06, + "loss": 0.6054, + "step": 1690 + }, + { + "epoch": 2.7119999999999997, + "grad_norm": 0.7358693480491638, + "learning_rate": 4.513545525335705e-06, + "loss": 0.5173, + "step": 1695 + }, + { + "epoch": 2.7199999999999998, + "grad_norm": 0.9218215942382812, + "learning_rate": 4.268050246793276e-06, + "loss": 0.4944, + "step": 1700 + }, + { + "epoch": 2.7279999999999998, + "grad_norm": 0.6374716758728027, + "learning_rate": 4.029273766093333e-06, + "loss": 0.5183, + "step": 1705 + }, + { + "epoch": 2.7359999999999998, + "grad_norm": 0.583243191242218, + "learning_rate": 3.797232841391407e-06, + "loss": 0.668, + "step": 1710 + }, + { + "epoch": 2.7439999999999998, + "grad_norm": 0.8384690284729004, + "learning_rate": 3.5719437581185454e-06, + "loss": 0.5068, + "step": 1715 + }, + { + "epoch": 2.752, + "grad_norm": 0.8034130334854126, + "learning_rate": 3.3534223278382405e-06, + "loss": 0.5823, + "step": 1720 + }, + { + "epoch": 2.76, + "grad_norm": 0.8146041631698608, + "learning_rate": 3.1416838871368924e-06, + "loss": 0.6111, + "step": 1725 + }, + { + "epoch": 2.768, + "grad_norm": 0.8122982382774353, + "learning_rate": 2.936743296547273e-06, + "loss": 0.5231, + "step": 1730 + }, + { + "epoch": 2.776, + "grad_norm": 0.7326982021331787, + "learning_rate": 2.738614939505646e-06, + "loss": 0.5236, + "step": 1735 + }, + { + "epoch": 2.784, + "grad_norm": 0.7472147345542908, + "learning_rate": 2.5473127213422763e-06, + "loss": 0.5657, + "step": 1740 + }, + { + "epoch": 2.792, + "grad_norm": 0.8197700381278992, + "learning_rate": 2.3628500683055222e-06, + "loss": 0.5518, + "step": 1745 + }, + { + "epoch": 2.8, + "grad_norm": 0.8733732104301453, + "learning_rate": 2.1852399266194314e-06, + "loss": 0.4908, + "step": 1750 + }, + { + "epoch": 2.808, + "grad_norm": 0.8913092017173767, + "learning_rate": 2.014494761575314e-06, + "loss": 0.5459, + "step": 1755 + }, + { + "epoch": 2.816, + "grad_norm": 1.1259772777557373, + "learning_rate": 1.8506265566567094e-06, + "loss": 0.5208, + "step": 1760 + }, + { + "epoch": 2.824, + "grad_norm": 0.7692184448242188, + "learning_rate": 1.6936468126984572e-06, + "loss": 0.5824, + "step": 1765 + }, + { + "epoch": 2.832, + "grad_norm": 0.588602602481842, + "learning_rate": 1.543566547079467e-06, + "loss": 0.5512, + "step": 1770 + }, + { + "epoch": 2.84, + "grad_norm": 0.6324055790901184, + "learning_rate": 1.400396292949513e-06, + "loss": 0.6327, + "step": 1775 + }, + { + "epoch": 2.848, + "grad_norm": 0.7608378529548645, + "learning_rate": 1.26414609848996e-06, + "loss": 0.5292, + "step": 1780 + }, + { + "epoch": 2.856, + "grad_norm": 0.7972851395606995, + "learning_rate": 1.134825526208605e-06, + "loss": 0.5692, + "step": 1785 + }, + { + "epoch": 2.864, + "grad_norm": 0.9705446362495422, + "learning_rate": 1.0124436522684243e-06, + "loss": 0.5532, + "step": 1790 + }, + { + "epoch": 2.872, + "grad_norm": 0.6317399144172668, + "learning_rate": 8.970090658507291e-07, + "loss": 0.5314, + "step": 1795 + }, + { + "epoch": 2.88, + "grad_norm": 0.6457757949829102, + "learning_rate": 7.885298685522235e-07, + "loss": 0.524, + "step": 1800 + }, + { + "epoch": 2.888, + "grad_norm": 0.8593656420707703, + "learning_rate": 6.870136738164612e-07, + "loss": 0.5227, + "step": 1805 + }, + { + "epoch": 2.896, + "grad_norm": 1.0187020301818848, + "learning_rate": 5.924676063995382e-07, + "loss": 0.5993, + "step": 1810 + }, + { + "epoch": 2.904, + "grad_norm": 0.7082214951515198, + "learning_rate": 5.048983018699827e-07, + "loss": 0.5618, + "step": 1815 + }, + { + "epoch": 2.912, + "grad_norm": 0.6521438956260681, + "learning_rate": 4.2431190614309335e-07, + "loss": 0.5504, + "step": 1820 + }, + { + "epoch": 2.92, + "grad_norm": 0.8906036615371704, + "learning_rate": 3.50714075049563e-07, + "loss": 0.5147, + "step": 1825 + }, + { + "epoch": 2.928, + "grad_norm": 1.0908008813858032, + "learning_rate": 2.841099739386066e-07, + "loss": 0.5564, + "step": 1830 + }, + { + "epoch": 2.936, + "grad_norm": 0.6374122500419617, + "learning_rate": 2.2450427731534053e-07, + "loss": 0.5188, + "step": 1835 + }, + { + "epoch": 2.944, + "grad_norm": 0.9616740345954895, + "learning_rate": 1.7190116851280026e-07, + "loss": 0.5438, + "step": 1840 + }, + { + "epoch": 2.952, + "grad_norm": 1.0712924003601074, + "learning_rate": 1.2630433939825327e-07, + "loss": 0.4962, + "step": 1845 + }, + { + "epoch": 2.96, + "grad_norm": 0.8226613998413086, + "learning_rate": 8.771699011416168e-08, + "loss": 0.5021, + "step": 1850 + }, + { + "epoch": 2.968, + "grad_norm": 0.9519492983818054, + "learning_rate": 5.6141828853573106e-08, + "loss": 0.5277, + "step": 1855 + }, + { + "epoch": 2.976, + "grad_norm": 0.9817518591880798, + "learning_rate": 3.1581071670006015e-08, + "loss": 0.5764, + "step": 1860 + }, + { + "epoch": 2.984, + "grad_norm": 0.7039242386817932, + "learning_rate": 1.4036442321962995e-08, + "loss": 0.5408, + "step": 1865 + }, + { + "epoch": 2.992, + "grad_norm": 0.591012179851532, + "learning_rate": 3.509172151938689e-09, + "loss": 0.5014, + "step": 1870 + }, + { + "epoch": 3.0, + "grad_norm": 0.7672661542892456, + "learning_rate": 0.0, + "loss": 0.6182, + "step": 1875 + } + ], + "logging_steps": 5, + "max_steps": 1875, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.1764404625814323e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1875/training_args.bin b/checkpoint-1875/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b72392a9094690809605a9b89988a05f68c770c0 --- /dev/null +++ b/checkpoint-1875/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f6303cd7c1754cff6fd463f0e4dddf2a566ba7f29bbf2a72fbb560b32d5ce98 +size 5432 diff --git a/checkpoint-200/README.md b/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..31d07dbe00996b0568fd0eb91d05ac9a95722459 --- /dev/null +++ b/checkpoint-200/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.2-1B +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-200/adapter_config.json b/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73202e5c7ba7132c890e52da7442b36a9efff4dd --- /dev/null +++ b/checkpoint-200/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.2-1B", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "down_proj", + "k_proj", + "gate_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-200/adapter_model.safetensors b/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0f1c797611eceae0cd2fcdbab57546689d78f9bf --- /dev/null +++ b/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:539600392265b6d89a8d665d509d929b2600a637fbeb02dea86f50d822af90eb +size 22573704 diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c8fff132dcd511dc14f780fb1cc6753bd90319a7 --- /dev/null +++ b/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29e70a973497780780e3ea4b355121e910128937b09a5ab6b36c75b0c413bdc3 +size 45276986 diff --git a/checkpoint-200/rng_state.pth b/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..93a52228ea16468eaab41b66555ced3eaaea002c --- /dev/null +++ b/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:386fcc8cc1089aade9450d86fb239ea3483f455fd2d78d8378645feecfec9d69 +size 14244 diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e01e9dad217e7c9f2c3104afe13d3b3a9d02e67 --- /dev/null +++ b/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dad6c3134999faf80423953251450284daa031eb6b358fb44f5b77a376ba8c7c +size 1064 diff --git a/checkpoint-200/special_tokens_map.json b/checkpoint-200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/checkpoint-200/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/checkpoint-200/tokenizer.json b/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-200/tokenizer_config.json b/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61c03bf2b5e0c03056cebf7a58c8c096dafc1ffc --- /dev/null +++ b/checkpoint-200/tokenizer_config.json @@ -0,0 +1,2065 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end_of_text|>' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|end_of_text|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..877104e06ebe9ccdd53c4fe2a6d2f17918614b72 --- /dev/null +++ b/checkpoint-200/trainer_state.json @@ -0,0 +1,313 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.32, + "eval_steps": 500, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008, + "grad_norm": 0.8485889434814453, + "learning_rate": 0.0001999964908278481, + "loss": 1.2049, + "step": 5 + }, + { + "epoch": 0.016, + "grad_norm": 0.47789862751960754, + "learning_rate": 0.00019998596355767805, + "loss": 0.9333, + "step": 10 + }, + { + "epoch": 0.024, + "grad_norm": 1.017558217048645, + "learning_rate": 0.00019996841892833, + "loss": 0.8671, + "step": 15 + }, + { + "epoch": 0.032, + "grad_norm": 0.6610977053642273, + "learning_rate": 0.00019994385817114646, + "loss": 0.7979, + "step": 20 + }, + { + "epoch": 0.04, + "grad_norm": 0.6075429320335388, + "learning_rate": 0.00019991228300988585, + "loss": 0.7662, + "step": 25 + }, + { + "epoch": 0.048, + "grad_norm": 0.6595763564109802, + "learning_rate": 0.00019987369566060176, + "loss": 0.7929, + "step": 30 + }, + { + "epoch": 0.056, + "grad_norm": 0.6968618035316467, + "learning_rate": 0.00019982809883148722, + "loss": 0.7683, + "step": 35 + }, + { + "epoch": 0.064, + "grad_norm": 0.4889592230319977, + "learning_rate": 0.00019977549572268468, + "loss": 0.8667, + "step": 40 + }, + { + "epoch": 0.072, + "grad_norm": 0.6651108264923096, + "learning_rate": 0.0001997158900260614, + "loss": 0.8446, + "step": 45 + }, + { + "epoch": 0.08, + "grad_norm": 0.5898510217666626, + "learning_rate": 0.00019964928592495045, + "loss": 0.9051, + "step": 50 + }, + { + "epoch": 0.088, + "grad_norm": 0.4398016035556793, + "learning_rate": 0.00019957568809385694, + "loss": 0.7235, + "step": 55 + }, + { + "epoch": 0.096, + "grad_norm": 0.6901968121528625, + "learning_rate": 0.00019949510169813003, + "loss": 0.8169, + "step": 60 + }, + { + "epoch": 0.104, + "grad_norm": 0.6267213225364685, + "learning_rate": 0.00019940753239360047, + "loss": 0.8266, + "step": 65 + }, + { + "epoch": 0.112, + "grad_norm": 0.48524895310401917, + "learning_rate": 0.00019931298632618356, + "loss": 0.758, + "step": 70 + }, + { + "epoch": 0.12, + "grad_norm": 0.5294132232666016, + "learning_rate": 0.0001992114701314478, + "loss": 0.7759, + "step": 75 + }, + { + "epoch": 0.128, + "grad_norm": 0.48957982659339905, + "learning_rate": 0.0001991029909341493, + "loss": 0.7797, + "step": 80 + }, + { + "epoch": 0.136, + "grad_norm": 0.645412802696228, + "learning_rate": 0.00019898755634773158, + "loss": 0.7437, + "step": 85 + }, + { + "epoch": 0.144, + "grad_norm": 0.43297675251960754, + "learning_rate": 0.0001988651744737914, + "loss": 0.8043, + "step": 90 + }, + { + "epoch": 0.152, + "grad_norm": 0.5513920783996582, + "learning_rate": 0.00019873585390151003, + "loss": 0.7701, + "step": 95 + }, + { + "epoch": 0.16, + "grad_norm": 0.8462435007095337, + "learning_rate": 0.0001985996037070505, + "loss": 0.709, + "step": 100 + }, + { + "epoch": 0.168, + "grad_norm": 0.6892585158348083, + "learning_rate": 0.00019845643345292054, + "loss": 0.7377, + "step": 105 + }, + { + "epoch": 0.176, + "grad_norm": 0.4617864191532135, + "learning_rate": 0.00019830635318730154, + "loss": 0.8352, + "step": 110 + }, + { + "epoch": 0.184, + "grad_norm": 0.6300354599952698, + "learning_rate": 0.0001981493734433433, + "loss": 0.7738, + "step": 115 + }, + { + "epoch": 0.192, + "grad_norm": 0.8086859583854675, + "learning_rate": 0.0001979855052384247, + "loss": 0.8067, + "step": 120 + }, + { + "epoch": 0.2, + "grad_norm": 0.6272985935211182, + "learning_rate": 0.00019781476007338058, + "loss": 0.7456, + "step": 125 + }, + { + "epoch": 0.208, + "grad_norm": 0.44750839471817017, + "learning_rate": 0.00019763714993169452, + "loss": 0.758, + "step": 130 + }, + { + "epoch": 0.216, + "grad_norm": 0.5053977370262146, + "learning_rate": 0.00019745268727865774, + "loss": 0.7895, + "step": 135 + }, + { + "epoch": 0.224, + "grad_norm": 0.41920769214630127, + "learning_rate": 0.00019726138506049438, + "loss": 0.7302, + "step": 140 + }, + { + "epoch": 0.232, + "grad_norm": 0.38280290365219116, + "learning_rate": 0.00019706325670345275, + "loss": 0.8152, + "step": 145 + }, + { + "epoch": 0.24, + "grad_norm": 0.554710865020752, + "learning_rate": 0.0001968583161128631, + "loss": 0.8461, + "step": 150 + }, + { + "epoch": 0.248, + "grad_norm": 0.5612509250640869, + "learning_rate": 0.00019664657767216176, + "loss": 0.7787, + "step": 155 + }, + { + "epoch": 0.256, + "grad_norm": 0.610614538192749, + "learning_rate": 0.00019642805624188147, + "loss": 0.7574, + "step": 160 + }, + { + "epoch": 0.264, + "grad_norm": 0.679517924785614, + "learning_rate": 0.0001962027671586086, + "loss": 0.8487, + "step": 165 + }, + { + "epoch": 0.272, + "grad_norm": 0.6685434579849243, + "learning_rate": 0.00019597072623390668, + "loss": 0.6611, + "step": 170 + }, + { + "epoch": 0.28, + "grad_norm": 0.480293869972229, + "learning_rate": 0.00019573194975320673, + "loss": 0.7802, + "step": 175 + }, + { + "epoch": 0.288, + "grad_norm": 0.7727369070053101, + "learning_rate": 0.00019548645447466431, + "loss": 0.6727, + "step": 180 + }, + { + "epoch": 0.296, + "grad_norm": 0.6371043920516968, + "learning_rate": 0.00019523425762798329, + "loss": 0.7502, + "step": 185 + }, + { + "epoch": 0.304, + "grad_norm": 0.6399966478347778, + "learning_rate": 0.00019497537691320668, + "loss": 0.8401, + "step": 190 + }, + { + "epoch": 0.312, + "grad_norm": 0.7263137698173523, + "learning_rate": 0.00019470983049947444, + "loss": 0.7494, + "step": 195 + }, + { + "epoch": 0.32, + "grad_norm": 0.402416467666626, + "learning_rate": 0.00019443763702374812, + "loss": 0.7842, + "step": 200 + } + ], + "logging_steps": 5, + "max_steps": 1875, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.2337729827766272e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b72392a9094690809605a9b89988a05f68c770c0 --- /dev/null +++ b/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f6303cd7c1754cff6fd463f0e4dddf2a566ba7f29bbf2a72fbb560b32d5ce98 +size 5432 diff --git a/checkpoint-300/README.md b/checkpoint-300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..31d07dbe00996b0568fd0eb91d05ac9a95722459 --- /dev/null +++ b/checkpoint-300/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.2-1B +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-300/adapter_config.json b/checkpoint-300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73202e5c7ba7132c890e52da7442b36a9efff4dd --- /dev/null +++ b/checkpoint-300/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.2-1B", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "down_proj", + "k_proj", + "gate_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-300/adapter_model.safetensors b/checkpoint-300/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3a9e164f7848d3ceeb7a8574e9a4d2fc97a42956 --- /dev/null +++ b/checkpoint-300/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66546909729f19b1ecbfea54e9dc990d8916c4593340728298ae3728eb682778 +size 22573704 diff --git a/checkpoint-300/optimizer.pt b/checkpoint-300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e112dc3094acfea288118f82c040800b913758b --- /dev/null +++ b/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6129d7cf76fdba16b64d9efe52e484780cd067c6e5bbd81d8611e34f61be3c12 +size 45276986 diff --git a/checkpoint-300/rng_state.pth b/checkpoint-300/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..93a52228ea16468eaab41b66555ced3eaaea002c --- /dev/null +++ b/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:386fcc8cc1089aade9450d86fb239ea3483f455fd2d78d8378645feecfec9d69 +size 14244 diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..fcf445dfe81ee88a19395f4d600f06654fb4dbef --- /dev/null +++ b/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7efc5b9509020acd538e103b4c206b5094c7f6f88bb022a2dc967282f3f504a5 +size 1064 diff --git a/checkpoint-300/special_tokens_map.json b/checkpoint-300/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/checkpoint-300/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/checkpoint-300/tokenizer.json b/checkpoint-300/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-300/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-300/tokenizer_config.json b/checkpoint-300/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61c03bf2b5e0c03056cebf7a58c8c096dafc1ffc --- /dev/null +++ b/checkpoint-300/tokenizer_config.json @@ -0,0 +1,2065 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end_of_text|>' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|end_of_text|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..569f9e85fe079eeacf7ad864906c3e04372799c3 --- /dev/null +++ b/checkpoint-300/trainer_state.json @@ -0,0 +1,453 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.48, + "eval_steps": 500, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008, + "grad_norm": 0.8485889434814453, + "learning_rate": 0.0001999964908278481, + "loss": 1.2049, + "step": 5 + }, + { + "epoch": 0.016, + "grad_norm": 0.47789862751960754, + "learning_rate": 0.00019998596355767805, + "loss": 0.9333, + "step": 10 + }, + { + "epoch": 0.024, + "grad_norm": 1.017558217048645, + "learning_rate": 0.00019996841892833, + "loss": 0.8671, + "step": 15 + }, + { + "epoch": 0.032, + "grad_norm": 0.6610977053642273, + "learning_rate": 0.00019994385817114646, + "loss": 0.7979, + "step": 20 + }, + { + "epoch": 0.04, + "grad_norm": 0.6075429320335388, + "learning_rate": 0.00019991228300988585, + "loss": 0.7662, + "step": 25 + }, + { + "epoch": 0.048, + "grad_norm": 0.6595763564109802, + "learning_rate": 0.00019987369566060176, + "loss": 0.7929, + "step": 30 + }, + { + "epoch": 0.056, + "grad_norm": 0.6968618035316467, + "learning_rate": 0.00019982809883148722, + "loss": 0.7683, + "step": 35 + }, + { + "epoch": 0.064, + "grad_norm": 0.4889592230319977, + "learning_rate": 0.00019977549572268468, + "loss": 0.8667, + "step": 40 + }, + { + "epoch": 0.072, + "grad_norm": 0.6651108264923096, + "learning_rate": 0.0001997158900260614, + "loss": 0.8446, + "step": 45 + }, + { + "epoch": 0.08, + "grad_norm": 0.5898510217666626, + "learning_rate": 0.00019964928592495045, + "loss": 0.9051, + "step": 50 + }, + { + "epoch": 0.088, + "grad_norm": 0.4398016035556793, + "learning_rate": 0.00019957568809385694, + "loss": 0.7235, + "step": 55 + }, + { + "epoch": 0.096, + "grad_norm": 0.6901968121528625, + "learning_rate": 0.00019949510169813003, + "loss": 0.8169, + "step": 60 + }, + { + "epoch": 0.104, + "grad_norm": 0.6267213225364685, + "learning_rate": 0.00019940753239360047, + "loss": 0.8266, + "step": 65 + }, + { + "epoch": 0.112, + "grad_norm": 0.48524895310401917, + "learning_rate": 0.00019931298632618356, + "loss": 0.758, + "step": 70 + }, + { + "epoch": 0.12, + "grad_norm": 0.5294132232666016, + "learning_rate": 0.0001992114701314478, + "loss": 0.7759, + "step": 75 + }, + { + "epoch": 0.128, + "grad_norm": 0.48957982659339905, + "learning_rate": 0.0001991029909341493, + "loss": 0.7797, + "step": 80 + }, + { + "epoch": 0.136, + "grad_norm": 0.645412802696228, + "learning_rate": 0.00019898755634773158, + "loss": 0.7437, + "step": 85 + }, + { + "epoch": 0.144, + "grad_norm": 0.43297675251960754, + "learning_rate": 0.0001988651744737914, + "loss": 0.8043, + "step": 90 + }, + { + "epoch": 0.152, + "grad_norm": 0.5513920783996582, + "learning_rate": 0.00019873585390151003, + "loss": 0.7701, + "step": 95 + }, + { + "epoch": 0.16, + "grad_norm": 0.8462435007095337, + "learning_rate": 0.0001985996037070505, + "loss": 0.709, + "step": 100 + }, + { + "epoch": 0.168, + "grad_norm": 0.6892585158348083, + "learning_rate": 0.00019845643345292054, + "loss": 0.7377, + "step": 105 + }, + { + "epoch": 0.176, + "grad_norm": 0.4617864191532135, + "learning_rate": 0.00019830635318730154, + "loss": 0.8352, + "step": 110 + }, + { + "epoch": 0.184, + "grad_norm": 0.6300354599952698, + "learning_rate": 0.0001981493734433433, + "loss": 0.7738, + "step": 115 + }, + { + "epoch": 0.192, + "grad_norm": 0.8086859583854675, + "learning_rate": 0.0001979855052384247, + "loss": 0.8067, + "step": 120 + }, + { + "epoch": 0.2, + "grad_norm": 0.6272985935211182, + "learning_rate": 0.00019781476007338058, + "loss": 0.7456, + "step": 125 + }, + { + "epoch": 0.208, + "grad_norm": 0.44750839471817017, + "learning_rate": 0.00019763714993169452, + "loss": 0.758, + "step": 130 + }, + { + "epoch": 0.216, + "grad_norm": 0.5053977370262146, + "learning_rate": 0.00019745268727865774, + "loss": 0.7895, + "step": 135 + }, + { + "epoch": 0.224, + "grad_norm": 0.41920769214630127, + "learning_rate": 0.00019726138506049438, + "loss": 0.7302, + "step": 140 + }, + { + "epoch": 0.232, + "grad_norm": 0.38280290365219116, + "learning_rate": 0.00019706325670345275, + "loss": 0.8152, + "step": 145 + }, + { + "epoch": 0.24, + "grad_norm": 0.554710865020752, + "learning_rate": 0.0001968583161128631, + "loss": 0.8461, + "step": 150 + }, + { + "epoch": 0.248, + "grad_norm": 0.5612509250640869, + "learning_rate": 0.00019664657767216176, + "loss": 0.7787, + "step": 155 + }, + { + "epoch": 0.256, + "grad_norm": 0.610614538192749, + "learning_rate": 0.00019642805624188147, + "loss": 0.7574, + "step": 160 + }, + { + "epoch": 0.264, + "grad_norm": 0.679517924785614, + "learning_rate": 0.0001962027671586086, + "loss": 0.8487, + "step": 165 + }, + { + "epoch": 0.272, + "grad_norm": 0.6685434579849243, + "learning_rate": 0.00019597072623390668, + "loss": 0.6611, + "step": 170 + }, + { + "epoch": 0.28, + "grad_norm": 0.480293869972229, + "learning_rate": 0.00019573194975320673, + "loss": 0.7802, + "step": 175 + }, + { + "epoch": 0.288, + "grad_norm": 0.7727369070053101, + "learning_rate": 0.00019548645447466431, + "loss": 0.6727, + "step": 180 + }, + { + "epoch": 0.296, + "grad_norm": 0.6371043920516968, + "learning_rate": 0.00019523425762798329, + "loss": 0.7502, + "step": 185 + }, + { + "epoch": 0.304, + "grad_norm": 0.6399966478347778, + "learning_rate": 0.00019497537691320668, + "loss": 0.8401, + "step": 190 + }, + { + "epoch": 0.312, + "grad_norm": 0.7263137698173523, + "learning_rate": 0.00019470983049947444, + "loss": 0.7494, + "step": 195 + }, + { + "epoch": 0.32, + "grad_norm": 0.402416467666626, + "learning_rate": 0.00019443763702374812, + "loss": 0.7842, + "step": 200 + }, + { + "epoch": 0.328, + "grad_norm": 0.6639626026153564, + "learning_rate": 0.00019415881558950302, + "loss": 0.8082, + "step": 205 + }, + { + "epoch": 0.336, + "grad_norm": 0.5801042914390564, + "learning_rate": 0.00019387338576538744, + "loss": 0.7883, + "step": 210 + }, + { + "epoch": 0.344, + "grad_norm": 0.5533607006072998, + "learning_rate": 0.00019358136758384912, + "loss": 0.7356, + "step": 215 + }, + { + "epoch": 0.352, + "grad_norm": 0.6019654273986816, + "learning_rate": 0.00019328278153972947, + "loss": 0.7891, + "step": 220 + }, + { + "epoch": 0.36, + "grad_norm": 0.5344104170799255, + "learning_rate": 0.00019297764858882514, + "loss": 0.7671, + "step": 225 + }, + { + "epoch": 0.368, + "grad_norm": 0.5494843125343323, + "learning_rate": 0.0001926659901464172, + "loss": 0.6608, + "step": 230 + }, + { + "epoch": 0.376, + "grad_norm": 0.465420126914978, + "learning_rate": 0.00019234782808576824, + "loss": 0.647, + "step": 235 + }, + { + "epoch": 0.384, + "grad_norm": 0.5202775001525879, + "learning_rate": 0.00019202318473658705, + "loss": 0.729, + "step": 240 + }, + { + "epoch": 0.392, + "grad_norm": 0.5757818222045898, + "learning_rate": 0.00019169208288346166, + "loss": 0.6713, + "step": 245 + }, + { + "epoch": 0.4, + "grad_norm": 0.46555572748184204, + "learning_rate": 0.0001913545457642601, + "loss": 0.7049, + "step": 250 + }, + { + "epoch": 0.408, + "grad_norm": 0.5101790428161621, + "learning_rate": 0.00019101059706849957, + "loss": 0.7419, + "step": 255 + }, + { + "epoch": 0.416, + "grad_norm": 0.6083744764328003, + "learning_rate": 0.00019066026093568378, + "loss": 0.7148, + "step": 260 + }, + { + "epoch": 0.424, + "grad_norm": 0.4719640612602234, + "learning_rate": 0.00019030356195360874, + "loss": 0.7493, + "step": 265 + }, + { + "epoch": 0.432, + "grad_norm": 0.7365225553512573, + "learning_rate": 0.0001899405251566371, + "loss": 0.7652, + "step": 270 + }, + { + "epoch": 0.44, + "grad_norm": 0.4452705383300781, + "learning_rate": 0.0001895711760239413, + "loss": 0.7438, + "step": 275 + }, + { + "epoch": 0.448, + "grad_norm": 0.6071786284446716, + "learning_rate": 0.0001891955404777151, + "loss": 0.7683, + "step": 280 + }, + { + "epoch": 0.456, + "grad_norm": 0.5774498581886292, + "learning_rate": 0.00018881364488135448, + "loss": 0.8115, + "step": 285 + }, + { + "epoch": 0.464, + "grad_norm": 0.6134682893753052, + "learning_rate": 0.00018842551603760724, + "loss": 0.8335, + "step": 290 + }, + { + "epoch": 0.472, + "grad_norm": 0.4869893193244934, + "learning_rate": 0.00018803118118669202, + "loss": 0.6933, + "step": 295 + }, + { + "epoch": 0.48, + "grad_norm": 0.6457111239433289, + "learning_rate": 0.00018763066800438636, + "loss": 0.7515, + "step": 300 + } + ], + "logging_steps": 5, + "max_steps": 1875, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.8927173541298176e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b72392a9094690809605a9b89988a05f68c770c0 --- /dev/null +++ b/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f6303cd7c1754cff6fd463f0e4dddf2a566ba7f29bbf2a72fbb560b32d5ce98 +size 5432 diff --git a/checkpoint-400/README.md b/checkpoint-400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..31d07dbe00996b0568fd0eb91d05ac9a95722459 --- /dev/null +++ b/checkpoint-400/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.2-1B +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-400/adapter_config.json b/checkpoint-400/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73202e5c7ba7132c890e52da7442b36a9efff4dd --- /dev/null +++ b/checkpoint-400/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.2-1B", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "down_proj", + "k_proj", + "gate_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-400/adapter_model.safetensors b/checkpoint-400/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..429fb11aa31bd07de848e9f56e0ef20123fd6663 --- /dev/null +++ b/checkpoint-400/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3ce1932001edf4fdf8680f1b1c8dda73ede7836d4da6e2c42588a1c71654b5e +size 22573704 diff --git a/checkpoint-400/optimizer.pt b/checkpoint-400/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1909aaece302d808d0538ec72239d5c19321d0cc --- /dev/null +++ b/checkpoint-400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ea5efe7c2cb9b18b562567f4c377b7bed9112b47294674d824efb5cca688a27 +size 45276986 diff --git a/checkpoint-400/rng_state.pth b/checkpoint-400/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..93a52228ea16468eaab41b66555ced3eaaea002c --- /dev/null +++ b/checkpoint-400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:386fcc8cc1089aade9450d86fb239ea3483f455fd2d78d8378645feecfec9d69 +size 14244 diff --git a/checkpoint-400/scheduler.pt b/checkpoint-400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..802339c5d72d86401d83baffba15c3e88fddcca4 --- /dev/null +++ b/checkpoint-400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fb37142613edd3d55d2b3a28992c79903ad20ad05e9a18470c93ae5fb7cbcd9 +size 1064 diff --git a/checkpoint-400/special_tokens_map.json b/checkpoint-400/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/checkpoint-400/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/checkpoint-400/tokenizer.json b/checkpoint-400/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-400/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-400/tokenizer_config.json b/checkpoint-400/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61c03bf2b5e0c03056cebf7a58c8c096dafc1ffc --- /dev/null +++ b/checkpoint-400/tokenizer_config.json @@ -0,0 +1,2065 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end_of_text|>' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|end_of_text|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-400/trainer_state.json b/checkpoint-400/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..28971800b645de7eeb668153e09a5f361834e40c --- /dev/null +++ b/checkpoint-400/trainer_state.json @@ -0,0 +1,593 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.64, + "eval_steps": 500, + "global_step": 400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008, + "grad_norm": 0.8485889434814453, + "learning_rate": 0.0001999964908278481, + "loss": 1.2049, + "step": 5 + }, + { + "epoch": 0.016, + "grad_norm": 0.47789862751960754, + "learning_rate": 0.00019998596355767805, + "loss": 0.9333, + "step": 10 + }, + { + "epoch": 0.024, + "grad_norm": 1.017558217048645, + "learning_rate": 0.00019996841892833, + "loss": 0.8671, + "step": 15 + }, + { + "epoch": 0.032, + "grad_norm": 0.6610977053642273, + "learning_rate": 0.00019994385817114646, + "loss": 0.7979, + "step": 20 + }, + { + "epoch": 0.04, + "grad_norm": 0.6075429320335388, + "learning_rate": 0.00019991228300988585, + "loss": 0.7662, + "step": 25 + }, + { + "epoch": 0.048, + "grad_norm": 0.6595763564109802, + "learning_rate": 0.00019987369566060176, + "loss": 0.7929, + "step": 30 + }, + { + "epoch": 0.056, + "grad_norm": 0.6968618035316467, + "learning_rate": 0.00019982809883148722, + "loss": 0.7683, + "step": 35 + }, + { + "epoch": 0.064, + "grad_norm": 0.4889592230319977, + "learning_rate": 0.00019977549572268468, + "loss": 0.8667, + "step": 40 + }, + { + "epoch": 0.072, + "grad_norm": 0.6651108264923096, + "learning_rate": 0.0001997158900260614, + "loss": 0.8446, + "step": 45 + }, + { + "epoch": 0.08, + "grad_norm": 0.5898510217666626, + "learning_rate": 0.00019964928592495045, + "loss": 0.9051, + "step": 50 + }, + { + "epoch": 0.088, + "grad_norm": 0.4398016035556793, + "learning_rate": 0.00019957568809385694, + "loss": 0.7235, + "step": 55 + }, + { + "epoch": 0.096, + "grad_norm": 0.6901968121528625, + "learning_rate": 0.00019949510169813003, + "loss": 0.8169, + "step": 60 + }, + { + "epoch": 0.104, + "grad_norm": 0.6267213225364685, + "learning_rate": 0.00019940753239360047, + "loss": 0.8266, + "step": 65 + }, + { + "epoch": 0.112, + "grad_norm": 0.48524895310401917, + "learning_rate": 0.00019931298632618356, + "loss": 0.758, + "step": 70 + }, + { + "epoch": 0.12, + "grad_norm": 0.5294132232666016, + "learning_rate": 0.0001992114701314478, + "loss": 0.7759, + "step": 75 + }, + { + "epoch": 0.128, + "grad_norm": 0.48957982659339905, + "learning_rate": 0.0001991029909341493, + "loss": 0.7797, + "step": 80 + }, + { + "epoch": 0.136, + "grad_norm": 0.645412802696228, + "learning_rate": 0.00019898755634773158, + "loss": 0.7437, + "step": 85 + }, + { + "epoch": 0.144, + "grad_norm": 0.43297675251960754, + "learning_rate": 0.0001988651744737914, + "loss": 0.8043, + "step": 90 + }, + { + "epoch": 0.152, + "grad_norm": 0.5513920783996582, + "learning_rate": 0.00019873585390151003, + "loss": 0.7701, + "step": 95 + }, + { + "epoch": 0.16, + "grad_norm": 0.8462435007095337, + "learning_rate": 0.0001985996037070505, + "loss": 0.709, + "step": 100 + }, + { + "epoch": 0.168, + "grad_norm": 0.6892585158348083, + "learning_rate": 0.00019845643345292054, + "loss": 0.7377, + "step": 105 + }, + { + "epoch": 0.176, + "grad_norm": 0.4617864191532135, + "learning_rate": 0.00019830635318730154, + "loss": 0.8352, + "step": 110 + }, + { + "epoch": 0.184, + "grad_norm": 0.6300354599952698, + "learning_rate": 0.0001981493734433433, + "loss": 0.7738, + "step": 115 + }, + { + "epoch": 0.192, + "grad_norm": 0.8086859583854675, + "learning_rate": 0.0001979855052384247, + "loss": 0.8067, + "step": 120 + }, + { + "epoch": 0.2, + "grad_norm": 0.6272985935211182, + "learning_rate": 0.00019781476007338058, + "loss": 0.7456, + "step": 125 + }, + { + "epoch": 0.208, + "grad_norm": 0.44750839471817017, + "learning_rate": 0.00019763714993169452, + "loss": 0.758, + "step": 130 + }, + { + "epoch": 0.216, + "grad_norm": 0.5053977370262146, + "learning_rate": 0.00019745268727865774, + "loss": 0.7895, + "step": 135 + }, + { + "epoch": 0.224, + "grad_norm": 0.41920769214630127, + "learning_rate": 0.00019726138506049438, + "loss": 0.7302, + "step": 140 + }, + { + "epoch": 0.232, + "grad_norm": 0.38280290365219116, + "learning_rate": 0.00019706325670345275, + "loss": 0.8152, + "step": 145 + }, + { + "epoch": 0.24, + "grad_norm": 0.554710865020752, + "learning_rate": 0.0001968583161128631, + "loss": 0.8461, + "step": 150 + }, + { + "epoch": 0.248, + "grad_norm": 0.5612509250640869, + "learning_rate": 0.00019664657767216176, + "loss": 0.7787, + "step": 155 + }, + { + "epoch": 0.256, + "grad_norm": 0.610614538192749, + "learning_rate": 0.00019642805624188147, + "loss": 0.7574, + "step": 160 + }, + { + "epoch": 0.264, + "grad_norm": 0.679517924785614, + "learning_rate": 0.0001962027671586086, + "loss": 0.8487, + "step": 165 + }, + { + "epoch": 0.272, + "grad_norm": 0.6685434579849243, + "learning_rate": 0.00019597072623390668, + "loss": 0.6611, + "step": 170 + }, + { + "epoch": 0.28, + "grad_norm": 0.480293869972229, + "learning_rate": 0.00019573194975320673, + "loss": 0.7802, + "step": 175 + }, + { + "epoch": 0.288, + "grad_norm": 0.7727369070053101, + "learning_rate": 0.00019548645447466431, + "loss": 0.6727, + "step": 180 + }, + { + "epoch": 0.296, + "grad_norm": 0.6371043920516968, + "learning_rate": 0.00019523425762798329, + "loss": 0.7502, + "step": 185 + }, + { + "epoch": 0.304, + "grad_norm": 0.6399966478347778, + "learning_rate": 0.00019497537691320668, + "loss": 0.8401, + "step": 190 + }, + { + "epoch": 0.312, + "grad_norm": 0.7263137698173523, + "learning_rate": 0.00019470983049947444, + "loss": 0.7494, + "step": 195 + }, + { + "epoch": 0.32, + "grad_norm": 0.402416467666626, + "learning_rate": 0.00019443763702374812, + "loss": 0.7842, + "step": 200 + }, + { + "epoch": 0.328, + "grad_norm": 0.6639626026153564, + "learning_rate": 0.00019415881558950302, + "loss": 0.8082, + "step": 205 + }, + { + "epoch": 0.336, + "grad_norm": 0.5801042914390564, + "learning_rate": 0.00019387338576538744, + "loss": 0.7883, + "step": 210 + }, + { + "epoch": 0.344, + "grad_norm": 0.5533607006072998, + "learning_rate": 0.00019358136758384912, + "loss": 0.7356, + "step": 215 + }, + { + "epoch": 0.352, + "grad_norm": 0.6019654273986816, + "learning_rate": 0.00019328278153972947, + "loss": 0.7891, + "step": 220 + }, + { + "epoch": 0.36, + "grad_norm": 0.5344104170799255, + "learning_rate": 0.00019297764858882514, + "loss": 0.7671, + "step": 225 + }, + { + "epoch": 0.368, + "grad_norm": 0.5494843125343323, + "learning_rate": 0.0001926659901464172, + "loss": 0.6608, + "step": 230 + }, + { + "epoch": 0.376, + "grad_norm": 0.465420126914978, + "learning_rate": 0.00019234782808576824, + "loss": 0.647, + "step": 235 + }, + { + "epoch": 0.384, + "grad_norm": 0.5202775001525879, + "learning_rate": 0.00019202318473658705, + "loss": 0.729, + "step": 240 + }, + { + "epoch": 0.392, + "grad_norm": 0.5757818222045898, + "learning_rate": 0.00019169208288346166, + "loss": 0.6713, + "step": 245 + }, + { + "epoch": 0.4, + "grad_norm": 0.46555572748184204, + "learning_rate": 0.0001913545457642601, + "loss": 0.7049, + "step": 250 + }, + { + "epoch": 0.408, + "grad_norm": 0.5101790428161621, + "learning_rate": 0.00019101059706849957, + "loss": 0.7419, + "step": 255 + }, + { + "epoch": 0.416, + "grad_norm": 0.6083744764328003, + "learning_rate": 0.00019066026093568378, + "loss": 0.7148, + "step": 260 + }, + { + "epoch": 0.424, + "grad_norm": 0.4719640612602234, + "learning_rate": 0.00019030356195360874, + "loss": 0.7493, + "step": 265 + }, + { + "epoch": 0.432, + "grad_norm": 0.7365225553512573, + "learning_rate": 0.0001899405251566371, + "loss": 0.7652, + "step": 270 + }, + { + "epoch": 0.44, + "grad_norm": 0.4452705383300781, + "learning_rate": 0.0001895711760239413, + "loss": 0.7438, + "step": 275 + }, + { + "epoch": 0.448, + "grad_norm": 0.6071786284446716, + "learning_rate": 0.0001891955404777151, + "loss": 0.7683, + "step": 280 + }, + { + "epoch": 0.456, + "grad_norm": 0.5774498581886292, + "learning_rate": 0.00018881364488135448, + "loss": 0.8115, + "step": 285 + }, + { + "epoch": 0.464, + "grad_norm": 0.6134682893753052, + "learning_rate": 0.00018842551603760724, + "loss": 0.8335, + "step": 290 + }, + { + "epoch": 0.472, + "grad_norm": 0.4869893193244934, + "learning_rate": 0.00018803118118669202, + "loss": 0.6933, + "step": 295 + }, + { + "epoch": 0.48, + "grad_norm": 0.6457111239433289, + "learning_rate": 0.00018763066800438636, + "loss": 0.7515, + "step": 300 + }, + { + "epoch": 0.488, + "grad_norm": 0.59674471616745, + "learning_rate": 0.0001872240046000844, + "loss": 0.6931, + "step": 305 + }, + { + "epoch": 0.496, + "grad_norm": 0.44608160853385925, + "learning_rate": 0.00018681121951482393, + "loss": 0.782, + "step": 310 + }, + { + "epoch": 0.504, + "grad_norm": 0.5934664607048035, + "learning_rate": 0.00018639234171928353, + "loss": 0.7361, + "step": 315 + }, + { + "epoch": 0.512, + "grad_norm": 0.49716323614120483, + "learning_rate": 0.0001859674006117491, + "loss": 0.7443, + "step": 320 + }, + { + "epoch": 0.52, + "grad_norm": 0.47995495796203613, + "learning_rate": 0.00018553642601605068, + "loss": 0.7221, + "step": 325 + }, + { + "epoch": 0.528, + "grad_norm": 0.5177399516105652, + "learning_rate": 0.00018509944817946922, + "loss": 0.7622, + "step": 330 + }, + { + "epoch": 0.536, + "grad_norm": 0.6638798713684082, + "learning_rate": 0.0001846564977706138, + "loss": 0.8556, + "step": 335 + }, + { + "epoch": 0.544, + "grad_norm": 0.5056771636009216, + "learning_rate": 0.00018420760587726923, + "loss": 0.7814, + "step": 340 + }, + { + "epoch": 0.552, + "grad_norm": 0.44543707370758057, + "learning_rate": 0.0001837528040042142, + "loss": 0.722, + "step": 345 + }, + { + "epoch": 0.56, + "grad_norm": 0.6765120625495911, + "learning_rate": 0.00018329212407100994, + "loss": 0.7903, + "step": 350 + }, + { + "epoch": 0.568, + "grad_norm": 0.49232372641563416, + "learning_rate": 0.00018282559840976042, + "loss": 0.6996, + "step": 355 + }, + { + "epoch": 0.576, + "grad_norm": 0.47392791509628296, + "learning_rate": 0.00018235325976284275, + "loss": 0.773, + "step": 360 + }, + { + "epoch": 0.584, + "grad_norm": 0.5056615471839905, + "learning_rate": 0.00018187514128060946, + "loss": 0.728, + "step": 365 + }, + { + "epoch": 0.592, + "grad_norm": 0.5857616662979126, + "learning_rate": 0.00018139127651906184, + "loss": 0.7659, + "step": 370 + }, + { + "epoch": 0.6, + "grad_norm": 0.5966864228248596, + "learning_rate": 0.00018090169943749476, + "loss": 0.7039, + "step": 375 + }, + { + "epoch": 0.608, + "grad_norm": 0.4524347484111786, + "learning_rate": 0.00018040644439611348, + "loss": 0.7125, + "step": 380 + }, + { + "epoch": 0.616, + "grad_norm": 0.5570976138114929, + "learning_rate": 0.00017990554615362198, + "loss": 0.698, + "step": 385 + }, + { + "epoch": 0.624, + "grad_norm": 0.6045777201652527, + "learning_rate": 0.00017939903986478355, + "loss": 0.8255, + "step": 390 + }, + { + "epoch": 0.632, + "grad_norm": 0.6149687767028809, + "learning_rate": 0.00017888696107795342, + "loss": 0.6616, + "step": 395 + }, + { + "epoch": 0.64, + "grad_norm": 0.4873579144477844, + "learning_rate": 0.000178369345732584, + "loss": 0.7452, + "step": 400 + } + ], + "logging_steps": 5, + "max_steps": 1875, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.4987417120866304e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b72392a9094690809605a9b89988a05f68c770c0 --- /dev/null +++ b/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f6303cd7c1754cff6fd463f0e4dddf2a566ba7f29bbf2a72fbb560b32d5ce98 +size 5432 diff --git a/checkpoint-500/README.md b/checkpoint-500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..31d07dbe00996b0568fd0eb91d05ac9a95722459 --- /dev/null +++ b/checkpoint-500/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.2-1B +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-500/adapter_config.json b/checkpoint-500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73202e5c7ba7132c890e52da7442b36a9efff4dd --- /dev/null +++ b/checkpoint-500/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.2-1B", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "down_proj", + "k_proj", + "gate_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-500/adapter_model.safetensors b/checkpoint-500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d7fa67eb8b7f773aa75d91026ab6928cf6001d3 --- /dev/null +++ b/checkpoint-500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f602b09099fd49d502876cfed4e02ede39b72ee4dce9a08c723b8062b67a9aec +size 22573704 diff --git a/checkpoint-500/optimizer.pt b/checkpoint-500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e188800bf8d4996650ca682fcad8ebe413d91b03 --- /dev/null +++ b/checkpoint-500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c8167a3ab334ee07b914bbeafe099227d0698c8f0ecbd6e547ffa66ae10d3e8 +size 45276986 diff --git a/checkpoint-500/rng_state.pth b/checkpoint-500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..93a52228ea16468eaab41b66555ced3eaaea002c --- /dev/null +++ b/checkpoint-500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:386fcc8cc1089aade9450d86fb239ea3483f455fd2d78d8378645feecfec9d69 +size 14244 diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..630cc0c742bc0d197c96187fb383a7c65d4a16af --- /dev/null +++ b/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41d1ce3e9d1290a7de2c506ddfff436767e5820ac449f30914f4d4a7a64a91a4 +size 1064 diff --git a/checkpoint-500/special_tokens_map.json b/checkpoint-500/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/checkpoint-500/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/checkpoint-500/tokenizer.json b/checkpoint-500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-500/tokenizer_config.json b/checkpoint-500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61c03bf2b5e0c03056cebf7a58c8c096dafc1ffc --- /dev/null +++ b/checkpoint-500/tokenizer_config.json @@ -0,0 +1,2065 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end_of_text|>' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|end_of_text|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..61683404ae3d84bd58b27966f6d6588be4e3f53d --- /dev/null +++ b/checkpoint-500/trainer_state.json @@ -0,0 +1,733 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8, + "eval_steps": 500, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008, + "grad_norm": 0.8485889434814453, + "learning_rate": 0.0001999964908278481, + "loss": 1.2049, + "step": 5 + }, + { + "epoch": 0.016, + "grad_norm": 0.47789862751960754, + "learning_rate": 0.00019998596355767805, + "loss": 0.9333, + "step": 10 + }, + { + "epoch": 0.024, + "grad_norm": 1.017558217048645, + "learning_rate": 0.00019996841892833, + "loss": 0.8671, + "step": 15 + }, + { + "epoch": 0.032, + "grad_norm": 0.6610977053642273, + "learning_rate": 0.00019994385817114646, + "loss": 0.7979, + "step": 20 + }, + { + "epoch": 0.04, + "grad_norm": 0.6075429320335388, + "learning_rate": 0.00019991228300988585, + "loss": 0.7662, + "step": 25 + }, + { + "epoch": 0.048, + "grad_norm": 0.6595763564109802, + "learning_rate": 0.00019987369566060176, + "loss": 0.7929, + "step": 30 + }, + { + "epoch": 0.056, + "grad_norm": 0.6968618035316467, + "learning_rate": 0.00019982809883148722, + "loss": 0.7683, + "step": 35 + }, + { + "epoch": 0.064, + "grad_norm": 0.4889592230319977, + "learning_rate": 0.00019977549572268468, + "loss": 0.8667, + "step": 40 + }, + { + "epoch": 0.072, + "grad_norm": 0.6651108264923096, + "learning_rate": 0.0001997158900260614, + "loss": 0.8446, + "step": 45 + }, + { + "epoch": 0.08, + "grad_norm": 0.5898510217666626, + "learning_rate": 0.00019964928592495045, + "loss": 0.9051, + "step": 50 + }, + { + "epoch": 0.088, + "grad_norm": 0.4398016035556793, + "learning_rate": 0.00019957568809385694, + "loss": 0.7235, + "step": 55 + }, + { + "epoch": 0.096, + "grad_norm": 0.6901968121528625, + "learning_rate": 0.00019949510169813003, + "loss": 0.8169, + "step": 60 + }, + { + "epoch": 0.104, + "grad_norm": 0.6267213225364685, + "learning_rate": 0.00019940753239360047, + "loss": 0.8266, + "step": 65 + }, + { + "epoch": 0.112, + "grad_norm": 0.48524895310401917, + "learning_rate": 0.00019931298632618356, + "loss": 0.758, + "step": 70 + }, + { + "epoch": 0.12, + "grad_norm": 0.5294132232666016, + "learning_rate": 0.0001992114701314478, + "loss": 0.7759, + "step": 75 + }, + { + "epoch": 0.128, + "grad_norm": 0.48957982659339905, + "learning_rate": 0.0001991029909341493, + "loss": 0.7797, + "step": 80 + }, + { + "epoch": 0.136, + "grad_norm": 0.645412802696228, + "learning_rate": 0.00019898755634773158, + "loss": 0.7437, + "step": 85 + }, + { + "epoch": 0.144, + "grad_norm": 0.43297675251960754, + "learning_rate": 0.0001988651744737914, + "loss": 0.8043, + "step": 90 + }, + { + "epoch": 0.152, + "grad_norm": 0.5513920783996582, + "learning_rate": 0.00019873585390151003, + "loss": 0.7701, + "step": 95 + }, + { + "epoch": 0.16, + "grad_norm": 0.8462435007095337, + "learning_rate": 0.0001985996037070505, + "loss": 0.709, + "step": 100 + }, + { + "epoch": 0.168, + "grad_norm": 0.6892585158348083, + "learning_rate": 0.00019845643345292054, + "loss": 0.7377, + "step": 105 + }, + { + "epoch": 0.176, + "grad_norm": 0.4617864191532135, + "learning_rate": 0.00019830635318730154, + "loss": 0.8352, + "step": 110 + }, + { + "epoch": 0.184, + "grad_norm": 0.6300354599952698, + "learning_rate": 0.0001981493734433433, + "loss": 0.7738, + "step": 115 + }, + { + "epoch": 0.192, + "grad_norm": 0.8086859583854675, + "learning_rate": 0.0001979855052384247, + "loss": 0.8067, + "step": 120 + }, + { + "epoch": 0.2, + "grad_norm": 0.6272985935211182, + "learning_rate": 0.00019781476007338058, + "loss": 0.7456, + "step": 125 + }, + { + "epoch": 0.208, + "grad_norm": 0.44750839471817017, + "learning_rate": 0.00019763714993169452, + "loss": 0.758, + "step": 130 + }, + { + "epoch": 0.216, + "grad_norm": 0.5053977370262146, + "learning_rate": 0.00019745268727865774, + "loss": 0.7895, + "step": 135 + }, + { + "epoch": 0.224, + "grad_norm": 0.41920769214630127, + "learning_rate": 0.00019726138506049438, + "loss": 0.7302, + "step": 140 + }, + { + "epoch": 0.232, + "grad_norm": 0.38280290365219116, + "learning_rate": 0.00019706325670345275, + "loss": 0.8152, + "step": 145 + }, + { + "epoch": 0.24, + "grad_norm": 0.554710865020752, + "learning_rate": 0.0001968583161128631, + "loss": 0.8461, + "step": 150 + }, + { + "epoch": 0.248, + "grad_norm": 0.5612509250640869, + "learning_rate": 0.00019664657767216176, + "loss": 0.7787, + "step": 155 + }, + { + "epoch": 0.256, + "grad_norm": 0.610614538192749, + "learning_rate": 0.00019642805624188147, + "loss": 0.7574, + "step": 160 + }, + { + "epoch": 0.264, + "grad_norm": 0.679517924785614, + "learning_rate": 0.0001962027671586086, + "loss": 0.8487, + "step": 165 + }, + { + "epoch": 0.272, + "grad_norm": 0.6685434579849243, + "learning_rate": 0.00019597072623390668, + "loss": 0.6611, + "step": 170 + }, + { + "epoch": 0.28, + "grad_norm": 0.480293869972229, + "learning_rate": 0.00019573194975320673, + "loss": 0.7802, + "step": 175 + }, + { + "epoch": 0.288, + "grad_norm": 0.7727369070053101, + "learning_rate": 0.00019548645447466431, + "loss": 0.6727, + "step": 180 + }, + { + "epoch": 0.296, + "grad_norm": 0.6371043920516968, + "learning_rate": 0.00019523425762798329, + "loss": 0.7502, + "step": 185 + }, + { + "epoch": 0.304, + "grad_norm": 0.6399966478347778, + "learning_rate": 0.00019497537691320668, + "loss": 0.8401, + "step": 190 + }, + { + "epoch": 0.312, + "grad_norm": 0.7263137698173523, + "learning_rate": 0.00019470983049947444, + "loss": 0.7494, + "step": 195 + }, + { + "epoch": 0.32, + "grad_norm": 0.402416467666626, + "learning_rate": 0.00019443763702374812, + "loss": 0.7842, + "step": 200 + }, + { + "epoch": 0.328, + "grad_norm": 0.6639626026153564, + "learning_rate": 0.00019415881558950302, + "loss": 0.8082, + "step": 205 + }, + { + "epoch": 0.336, + "grad_norm": 0.5801042914390564, + "learning_rate": 0.00019387338576538744, + "loss": 0.7883, + "step": 210 + }, + { + "epoch": 0.344, + "grad_norm": 0.5533607006072998, + "learning_rate": 0.00019358136758384912, + "loss": 0.7356, + "step": 215 + }, + { + "epoch": 0.352, + "grad_norm": 0.6019654273986816, + "learning_rate": 0.00019328278153972947, + "loss": 0.7891, + "step": 220 + }, + { + "epoch": 0.36, + "grad_norm": 0.5344104170799255, + "learning_rate": 0.00019297764858882514, + "loss": 0.7671, + "step": 225 + }, + { + "epoch": 0.368, + "grad_norm": 0.5494843125343323, + "learning_rate": 0.0001926659901464172, + "loss": 0.6608, + "step": 230 + }, + { + "epoch": 0.376, + "grad_norm": 0.465420126914978, + "learning_rate": 0.00019234782808576824, + "loss": 0.647, + "step": 235 + }, + { + "epoch": 0.384, + "grad_norm": 0.5202775001525879, + "learning_rate": 0.00019202318473658705, + "loss": 0.729, + "step": 240 + }, + { + "epoch": 0.392, + "grad_norm": 0.5757818222045898, + "learning_rate": 0.00019169208288346166, + "loss": 0.6713, + "step": 245 + }, + { + "epoch": 0.4, + "grad_norm": 0.46555572748184204, + "learning_rate": 0.0001913545457642601, + "loss": 0.7049, + "step": 250 + }, + { + "epoch": 0.408, + "grad_norm": 0.5101790428161621, + "learning_rate": 0.00019101059706849957, + "loss": 0.7419, + "step": 255 + }, + { + "epoch": 0.416, + "grad_norm": 0.6083744764328003, + "learning_rate": 0.00019066026093568378, + "loss": 0.7148, + "step": 260 + }, + { + "epoch": 0.424, + "grad_norm": 0.4719640612602234, + "learning_rate": 0.00019030356195360874, + "loss": 0.7493, + "step": 265 + }, + { + "epoch": 0.432, + "grad_norm": 0.7365225553512573, + "learning_rate": 0.0001899405251566371, + "loss": 0.7652, + "step": 270 + }, + { + "epoch": 0.44, + "grad_norm": 0.4452705383300781, + "learning_rate": 0.0001895711760239413, + "loss": 0.7438, + "step": 275 + }, + { + "epoch": 0.448, + "grad_norm": 0.6071786284446716, + "learning_rate": 0.0001891955404777151, + "loss": 0.7683, + "step": 280 + }, + { + "epoch": 0.456, + "grad_norm": 0.5774498581886292, + "learning_rate": 0.00018881364488135448, + "loss": 0.8115, + "step": 285 + }, + { + "epoch": 0.464, + "grad_norm": 0.6134682893753052, + "learning_rate": 0.00018842551603760724, + "loss": 0.8335, + "step": 290 + }, + { + "epoch": 0.472, + "grad_norm": 0.4869893193244934, + "learning_rate": 0.00018803118118669202, + "loss": 0.6933, + "step": 295 + }, + { + "epoch": 0.48, + "grad_norm": 0.6457111239433289, + "learning_rate": 0.00018763066800438636, + "loss": 0.7515, + "step": 300 + }, + { + "epoch": 0.488, + "grad_norm": 0.59674471616745, + "learning_rate": 0.0001872240046000844, + "loss": 0.6931, + "step": 305 + }, + { + "epoch": 0.496, + "grad_norm": 0.44608160853385925, + "learning_rate": 0.00018681121951482393, + "loss": 0.782, + "step": 310 + }, + { + "epoch": 0.504, + "grad_norm": 0.5934664607048035, + "learning_rate": 0.00018639234171928353, + "loss": 0.7361, + "step": 315 + }, + { + "epoch": 0.512, + "grad_norm": 0.49716323614120483, + "learning_rate": 0.0001859674006117491, + "loss": 0.7443, + "step": 320 + }, + { + "epoch": 0.52, + "grad_norm": 0.47995495796203613, + "learning_rate": 0.00018553642601605068, + "loss": 0.7221, + "step": 325 + }, + { + "epoch": 0.528, + "grad_norm": 0.5177399516105652, + "learning_rate": 0.00018509944817946922, + "loss": 0.7622, + "step": 330 + }, + { + "epoch": 0.536, + "grad_norm": 0.6638798713684082, + "learning_rate": 0.0001846564977706138, + "loss": 0.8556, + "step": 335 + }, + { + "epoch": 0.544, + "grad_norm": 0.5056771636009216, + "learning_rate": 0.00018420760587726923, + "loss": 0.7814, + "step": 340 + }, + { + "epoch": 0.552, + "grad_norm": 0.44543707370758057, + "learning_rate": 0.0001837528040042142, + "loss": 0.722, + "step": 345 + }, + { + "epoch": 0.56, + "grad_norm": 0.6765120625495911, + "learning_rate": 0.00018329212407100994, + "loss": 0.7903, + "step": 350 + }, + { + "epoch": 0.568, + "grad_norm": 0.49232372641563416, + "learning_rate": 0.00018282559840976042, + "loss": 0.6996, + "step": 355 + }, + { + "epoch": 0.576, + "grad_norm": 0.47392791509628296, + "learning_rate": 0.00018235325976284275, + "loss": 0.773, + "step": 360 + }, + { + "epoch": 0.584, + "grad_norm": 0.5056615471839905, + "learning_rate": 0.00018187514128060946, + "loss": 0.728, + "step": 365 + }, + { + "epoch": 0.592, + "grad_norm": 0.5857616662979126, + "learning_rate": 0.00018139127651906184, + "loss": 0.7659, + "step": 370 + }, + { + "epoch": 0.6, + "grad_norm": 0.5966864228248596, + "learning_rate": 0.00018090169943749476, + "loss": 0.7039, + "step": 375 + }, + { + "epoch": 0.608, + "grad_norm": 0.4524347484111786, + "learning_rate": 0.00018040644439611348, + "loss": 0.7125, + "step": 380 + }, + { + "epoch": 0.616, + "grad_norm": 0.5570976138114929, + "learning_rate": 0.00017990554615362198, + "loss": 0.698, + "step": 385 + }, + { + "epoch": 0.624, + "grad_norm": 0.6045777201652527, + "learning_rate": 0.00017939903986478355, + "loss": 0.8255, + "step": 390 + }, + { + "epoch": 0.632, + "grad_norm": 0.6149687767028809, + "learning_rate": 0.00017888696107795342, + "loss": 0.6616, + "step": 395 + }, + { + "epoch": 0.64, + "grad_norm": 0.4873579144477844, + "learning_rate": 0.000178369345732584, + "loss": 0.7452, + "step": 400 + }, + { + "epoch": 0.648, + "grad_norm": 0.5569061636924744, + "learning_rate": 0.00017784623015670238, + "loss": 0.7652, + "step": 405 + }, + { + "epoch": 0.656, + "grad_norm": 0.5825181603431702, + "learning_rate": 0.00017731765106436073, + "loss": 0.7793, + "step": 410 + }, + { + "epoch": 0.664, + "grad_norm": 0.4047383666038513, + "learning_rate": 0.00017678364555305978, + "loss": 0.6875, + "step": 415 + }, + { + "epoch": 0.672, + "grad_norm": 0.5080836415290833, + "learning_rate": 0.0001762442511011448, + "loss": 0.7465, + "step": 420 + }, + { + "epoch": 0.68, + "grad_norm": 0.5825940370559692, + "learning_rate": 0.00017569950556517566, + "loss": 0.7205, + "step": 425 + }, + { + "epoch": 0.688, + "grad_norm": 0.476992666721344, + "learning_rate": 0.00017514944717726962, + "loss": 0.6589, + "step": 430 + }, + { + "epoch": 0.696, + "grad_norm": 0.7424727082252502, + "learning_rate": 0.00017459411454241822, + "loss": 0.7035, + "step": 435 + }, + { + "epoch": 0.704, + "grad_norm": 0.6544787287712097, + "learning_rate": 0.00017403354663577783, + "loss": 0.787, + "step": 440 + }, + { + "epoch": 0.712, + "grad_norm": 0.49425187706947327, + "learning_rate": 0.00017346778279993415, + "loss": 0.7515, + "step": 445 + }, + { + "epoch": 0.72, + "grad_norm": 0.5473236441612244, + "learning_rate": 0.00017289686274214118, + "loss": 0.7199, + "step": 450 + }, + { + "epoch": 0.728, + "grad_norm": 0.6773544549942017, + "learning_rate": 0.00017232082653153422, + "loss": 0.8037, + "step": 455 + }, + { + "epoch": 0.736, + "grad_norm": 0.6355096101760864, + "learning_rate": 0.00017173971459631787, + "loss": 0.7502, + "step": 460 + }, + { + "epoch": 0.744, + "grad_norm": 0.47867000102996826, + "learning_rate": 0.00017115356772092857, + "loss": 0.7446, + "step": 465 + }, + { + "epoch": 0.752, + "grad_norm": 0.5135357975959778, + "learning_rate": 0.0001705624270431721, + "loss": 0.6507, + "step": 470 + }, + { + "epoch": 0.76, + "grad_norm": 0.48866042494773865, + "learning_rate": 0.00016996633405133655, + "loss": 0.7164, + "step": 475 + }, + { + "epoch": 0.768, + "grad_norm": 0.5892354249954224, + "learning_rate": 0.0001693653305812805, + "loss": 0.7621, + "step": 480 + }, + { + "epoch": 0.776, + "grad_norm": 0.6633970141410828, + "learning_rate": 0.00016875945881349676, + "loss": 0.7623, + "step": 485 + }, + { + "epoch": 0.784, + "grad_norm": 0.6444060802459717, + "learning_rate": 0.000168148761270152, + "loss": 0.6606, + "step": 490 + }, + { + "epoch": 0.792, + "grad_norm": 0.7012648582458496, + "learning_rate": 0.00016753328081210245, + "loss": 0.6941, + "step": 495 + }, + { + "epoch": 0.8, + "grad_norm": 0.7064160704612732, + "learning_rate": 0.00016691306063588583, + "loss": 0.6841, + "step": 500 + } + ], + "logging_steps": 5, + "max_steps": 1875, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.1643762555879424e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b72392a9094690809605a9b89988a05f68c770c0 --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f6303cd7c1754cff6fd463f0e4dddf2a566ba7f29bbf2a72fbb560b32d5ce98 +size 5432 diff --git a/checkpoint-600/README.md b/checkpoint-600/README.md new file mode 100644 index 0000000000000000000000000000000000000000..31d07dbe00996b0568fd0eb91d05ac9a95722459 --- /dev/null +++ b/checkpoint-600/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.2-1B +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-600/adapter_config.json b/checkpoint-600/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73202e5c7ba7132c890e52da7442b36a9efff4dd --- /dev/null +++ b/checkpoint-600/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.2-1B", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "down_proj", + "k_proj", + "gate_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-600/adapter_model.safetensors b/checkpoint-600/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..68bf7e3144656bf29d1756f047afa29aa5fa540b --- /dev/null +++ b/checkpoint-600/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d90ad3fd3f6d9166b75049f2923dec2d7a41c604fc09d7dbc458ce454c0ae6a3 +size 22573704 diff --git a/checkpoint-600/optimizer.pt b/checkpoint-600/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7ecc89149e6cf98f565294d2e942e8fce44d74fc --- /dev/null +++ b/checkpoint-600/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35113237cc4f677cce4d3e7035cd488cb659b7941c9e437ec4cf6188004a734a +size 45276986 diff --git a/checkpoint-600/rng_state.pth b/checkpoint-600/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..93a52228ea16468eaab41b66555ced3eaaea002c --- /dev/null +++ b/checkpoint-600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:386fcc8cc1089aade9450d86fb239ea3483f455fd2d78d8378645feecfec9d69 +size 14244 diff --git a/checkpoint-600/scheduler.pt b/checkpoint-600/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6667fdfb7a029b2c5ae465c109530e3a6d5434c7 --- /dev/null +++ b/checkpoint-600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd83495c5da896776fc4a926c71c29ac7d3e1a4836a52655fefe41adedeec40d +size 1064 diff --git a/checkpoint-600/special_tokens_map.json b/checkpoint-600/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/checkpoint-600/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/checkpoint-600/tokenizer.json b/checkpoint-600/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-600/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-600/tokenizer_config.json b/checkpoint-600/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61c03bf2b5e0c03056cebf7a58c8c096dafc1ffc --- /dev/null +++ b/checkpoint-600/tokenizer_config.json @@ -0,0 +1,2065 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end_of_text|>' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|end_of_text|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-600/trainer_state.json b/checkpoint-600/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..804935ed7f99e2b5e04e87c78796a1b6242fbd9a --- /dev/null +++ b/checkpoint-600/trainer_state.json @@ -0,0 +1,873 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.96, + "eval_steps": 500, + "global_step": 600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008, + "grad_norm": 0.8485889434814453, + "learning_rate": 0.0001999964908278481, + "loss": 1.2049, + "step": 5 + }, + { + "epoch": 0.016, + "grad_norm": 0.47789862751960754, + "learning_rate": 0.00019998596355767805, + "loss": 0.9333, + "step": 10 + }, + { + "epoch": 0.024, + "grad_norm": 1.017558217048645, + "learning_rate": 0.00019996841892833, + "loss": 0.8671, + "step": 15 + }, + { + "epoch": 0.032, + "grad_norm": 0.6610977053642273, + "learning_rate": 0.00019994385817114646, + "loss": 0.7979, + "step": 20 + }, + { + "epoch": 0.04, + "grad_norm": 0.6075429320335388, + "learning_rate": 0.00019991228300988585, + "loss": 0.7662, + "step": 25 + }, + { + "epoch": 0.048, + "grad_norm": 0.6595763564109802, + "learning_rate": 0.00019987369566060176, + "loss": 0.7929, + "step": 30 + }, + { + "epoch": 0.056, + "grad_norm": 0.6968618035316467, + "learning_rate": 0.00019982809883148722, + "loss": 0.7683, + "step": 35 + }, + { + "epoch": 0.064, + "grad_norm": 0.4889592230319977, + "learning_rate": 0.00019977549572268468, + "loss": 0.8667, + "step": 40 + }, + { + "epoch": 0.072, + "grad_norm": 0.6651108264923096, + "learning_rate": 0.0001997158900260614, + "loss": 0.8446, + "step": 45 + }, + { + "epoch": 0.08, + "grad_norm": 0.5898510217666626, + "learning_rate": 0.00019964928592495045, + "loss": 0.9051, + "step": 50 + }, + { + "epoch": 0.088, + "grad_norm": 0.4398016035556793, + "learning_rate": 0.00019957568809385694, + "loss": 0.7235, + "step": 55 + }, + { + "epoch": 0.096, + "grad_norm": 0.6901968121528625, + "learning_rate": 0.00019949510169813003, + "loss": 0.8169, + "step": 60 + }, + { + "epoch": 0.104, + "grad_norm": 0.6267213225364685, + "learning_rate": 0.00019940753239360047, + "loss": 0.8266, + "step": 65 + }, + { + "epoch": 0.112, + "grad_norm": 0.48524895310401917, + "learning_rate": 0.00019931298632618356, + "loss": 0.758, + "step": 70 + }, + { + "epoch": 0.12, + "grad_norm": 0.5294132232666016, + "learning_rate": 0.0001992114701314478, + "loss": 0.7759, + "step": 75 + }, + { + "epoch": 0.128, + "grad_norm": 0.48957982659339905, + "learning_rate": 0.0001991029909341493, + "loss": 0.7797, + "step": 80 + }, + { + "epoch": 0.136, + "grad_norm": 0.645412802696228, + "learning_rate": 0.00019898755634773158, + "loss": 0.7437, + "step": 85 + }, + { + "epoch": 0.144, + "grad_norm": 0.43297675251960754, + "learning_rate": 0.0001988651744737914, + "loss": 0.8043, + "step": 90 + }, + { + "epoch": 0.152, + "grad_norm": 0.5513920783996582, + "learning_rate": 0.00019873585390151003, + "loss": 0.7701, + "step": 95 + }, + { + "epoch": 0.16, + "grad_norm": 0.8462435007095337, + "learning_rate": 0.0001985996037070505, + "loss": 0.709, + "step": 100 + }, + { + "epoch": 0.168, + "grad_norm": 0.6892585158348083, + "learning_rate": 0.00019845643345292054, + "loss": 0.7377, + "step": 105 + }, + { + "epoch": 0.176, + "grad_norm": 0.4617864191532135, + "learning_rate": 0.00019830635318730154, + "loss": 0.8352, + "step": 110 + }, + { + "epoch": 0.184, + "grad_norm": 0.6300354599952698, + "learning_rate": 0.0001981493734433433, + "loss": 0.7738, + "step": 115 + }, + { + "epoch": 0.192, + "grad_norm": 0.8086859583854675, + "learning_rate": 0.0001979855052384247, + "loss": 0.8067, + "step": 120 + }, + { + "epoch": 0.2, + "grad_norm": 0.6272985935211182, + "learning_rate": 0.00019781476007338058, + "loss": 0.7456, + "step": 125 + }, + { + "epoch": 0.208, + "grad_norm": 0.44750839471817017, + "learning_rate": 0.00019763714993169452, + "loss": 0.758, + "step": 130 + }, + { + "epoch": 0.216, + "grad_norm": 0.5053977370262146, + "learning_rate": 0.00019745268727865774, + "loss": 0.7895, + "step": 135 + }, + { + "epoch": 0.224, + "grad_norm": 0.41920769214630127, + "learning_rate": 0.00019726138506049438, + "loss": 0.7302, + "step": 140 + }, + { + "epoch": 0.232, + "grad_norm": 0.38280290365219116, + "learning_rate": 0.00019706325670345275, + "loss": 0.8152, + "step": 145 + }, + { + "epoch": 0.24, + "grad_norm": 0.554710865020752, + "learning_rate": 0.0001968583161128631, + "loss": 0.8461, + "step": 150 + }, + { + "epoch": 0.248, + "grad_norm": 0.5612509250640869, + "learning_rate": 0.00019664657767216176, + "loss": 0.7787, + "step": 155 + }, + { + "epoch": 0.256, + "grad_norm": 0.610614538192749, + "learning_rate": 0.00019642805624188147, + "loss": 0.7574, + "step": 160 + }, + { + "epoch": 0.264, + "grad_norm": 0.679517924785614, + "learning_rate": 0.0001962027671586086, + "loss": 0.8487, + "step": 165 + }, + { + "epoch": 0.272, + "grad_norm": 0.6685434579849243, + "learning_rate": 0.00019597072623390668, + "loss": 0.6611, + "step": 170 + }, + { + "epoch": 0.28, + "grad_norm": 0.480293869972229, + "learning_rate": 0.00019573194975320673, + "loss": 0.7802, + "step": 175 + }, + { + "epoch": 0.288, + "grad_norm": 0.7727369070053101, + "learning_rate": 0.00019548645447466431, + "loss": 0.6727, + "step": 180 + }, + { + "epoch": 0.296, + "grad_norm": 0.6371043920516968, + "learning_rate": 0.00019523425762798329, + "loss": 0.7502, + "step": 185 + }, + { + "epoch": 0.304, + "grad_norm": 0.6399966478347778, + "learning_rate": 0.00019497537691320668, + "loss": 0.8401, + "step": 190 + }, + { + "epoch": 0.312, + "grad_norm": 0.7263137698173523, + "learning_rate": 0.00019470983049947444, + "loss": 0.7494, + "step": 195 + }, + { + "epoch": 0.32, + "grad_norm": 0.402416467666626, + "learning_rate": 0.00019443763702374812, + "loss": 0.7842, + "step": 200 + }, + { + "epoch": 0.328, + "grad_norm": 0.6639626026153564, + "learning_rate": 0.00019415881558950302, + "loss": 0.8082, + "step": 205 + }, + { + "epoch": 0.336, + "grad_norm": 0.5801042914390564, + "learning_rate": 0.00019387338576538744, + "loss": 0.7883, + "step": 210 + }, + { + "epoch": 0.344, + "grad_norm": 0.5533607006072998, + "learning_rate": 0.00019358136758384912, + "loss": 0.7356, + "step": 215 + }, + { + "epoch": 0.352, + "grad_norm": 0.6019654273986816, + "learning_rate": 0.00019328278153972947, + "loss": 0.7891, + "step": 220 + }, + { + "epoch": 0.36, + "grad_norm": 0.5344104170799255, + "learning_rate": 0.00019297764858882514, + "loss": 0.7671, + "step": 225 + }, + { + "epoch": 0.368, + "grad_norm": 0.5494843125343323, + "learning_rate": 0.0001926659901464172, + "loss": 0.6608, + "step": 230 + }, + { + "epoch": 0.376, + "grad_norm": 0.465420126914978, + "learning_rate": 0.00019234782808576824, + "loss": 0.647, + "step": 235 + }, + { + "epoch": 0.384, + "grad_norm": 0.5202775001525879, + "learning_rate": 0.00019202318473658705, + "loss": 0.729, + "step": 240 + }, + { + "epoch": 0.392, + "grad_norm": 0.5757818222045898, + "learning_rate": 0.00019169208288346166, + "loss": 0.6713, + "step": 245 + }, + { + "epoch": 0.4, + "grad_norm": 0.46555572748184204, + "learning_rate": 0.0001913545457642601, + "loss": 0.7049, + "step": 250 + }, + { + "epoch": 0.408, + "grad_norm": 0.5101790428161621, + "learning_rate": 0.00019101059706849957, + "loss": 0.7419, + "step": 255 + }, + { + "epoch": 0.416, + "grad_norm": 0.6083744764328003, + "learning_rate": 0.00019066026093568378, + "loss": 0.7148, + "step": 260 + }, + { + "epoch": 0.424, + "grad_norm": 0.4719640612602234, + "learning_rate": 0.00019030356195360874, + "loss": 0.7493, + "step": 265 + }, + { + "epoch": 0.432, + "grad_norm": 0.7365225553512573, + "learning_rate": 0.0001899405251566371, + "loss": 0.7652, + "step": 270 + }, + { + "epoch": 0.44, + "grad_norm": 0.4452705383300781, + "learning_rate": 0.0001895711760239413, + "loss": 0.7438, + "step": 275 + }, + { + "epoch": 0.448, + "grad_norm": 0.6071786284446716, + "learning_rate": 0.0001891955404777151, + "loss": 0.7683, + "step": 280 + }, + { + "epoch": 0.456, + "grad_norm": 0.5774498581886292, + "learning_rate": 0.00018881364488135448, + "loss": 0.8115, + "step": 285 + }, + { + "epoch": 0.464, + "grad_norm": 0.6134682893753052, + "learning_rate": 0.00018842551603760724, + "loss": 0.8335, + "step": 290 + }, + { + "epoch": 0.472, + "grad_norm": 0.4869893193244934, + "learning_rate": 0.00018803118118669202, + "loss": 0.6933, + "step": 295 + }, + { + "epoch": 0.48, + "grad_norm": 0.6457111239433289, + "learning_rate": 0.00018763066800438636, + "loss": 0.7515, + "step": 300 + }, + { + "epoch": 0.488, + "grad_norm": 0.59674471616745, + "learning_rate": 0.0001872240046000844, + "loss": 0.6931, + "step": 305 + }, + { + "epoch": 0.496, + "grad_norm": 0.44608160853385925, + "learning_rate": 0.00018681121951482393, + "loss": 0.782, + "step": 310 + }, + { + "epoch": 0.504, + "grad_norm": 0.5934664607048035, + "learning_rate": 0.00018639234171928353, + "loss": 0.7361, + "step": 315 + }, + { + "epoch": 0.512, + "grad_norm": 0.49716323614120483, + "learning_rate": 0.0001859674006117491, + "loss": 0.7443, + "step": 320 + }, + { + "epoch": 0.52, + "grad_norm": 0.47995495796203613, + "learning_rate": 0.00018553642601605068, + "loss": 0.7221, + "step": 325 + }, + { + "epoch": 0.528, + "grad_norm": 0.5177399516105652, + "learning_rate": 0.00018509944817946922, + "loss": 0.7622, + "step": 330 + }, + { + "epoch": 0.536, + "grad_norm": 0.6638798713684082, + "learning_rate": 0.0001846564977706138, + "loss": 0.8556, + "step": 335 + }, + { + "epoch": 0.544, + "grad_norm": 0.5056771636009216, + "learning_rate": 0.00018420760587726923, + "loss": 0.7814, + "step": 340 + }, + { + "epoch": 0.552, + "grad_norm": 0.44543707370758057, + "learning_rate": 0.0001837528040042142, + "loss": 0.722, + "step": 345 + }, + { + "epoch": 0.56, + "grad_norm": 0.6765120625495911, + "learning_rate": 0.00018329212407100994, + "loss": 0.7903, + "step": 350 + }, + { + "epoch": 0.568, + "grad_norm": 0.49232372641563416, + "learning_rate": 0.00018282559840976042, + "loss": 0.6996, + "step": 355 + }, + { + "epoch": 0.576, + "grad_norm": 0.47392791509628296, + "learning_rate": 0.00018235325976284275, + "loss": 0.773, + "step": 360 + }, + { + "epoch": 0.584, + "grad_norm": 0.5056615471839905, + "learning_rate": 0.00018187514128060946, + "loss": 0.728, + "step": 365 + }, + { + "epoch": 0.592, + "grad_norm": 0.5857616662979126, + "learning_rate": 0.00018139127651906184, + "loss": 0.7659, + "step": 370 + }, + { + "epoch": 0.6, + "grad_norm": 0.5966864228248596, + "learning_rate": 0.00018090169943749476, + "loss": 0.7039, + "step": 375 + }, + { + "epoch": 0.608, + "grad_norm": 0.4524347484111786, + "learning_rate": 0.00018040644439611348, + "loss": 0.7125, + "step": 380 + }, + { + "epoch": 0.616, + "grad_norm": 0.5570976138114929, + "learning_rate": 0.00017990554615362198, + "loss": 0.698, + "step": 385 + }, + { + "epoch": 0.624, + "grad_norm": 0.6045777201652527, + "learning_rate": 0.00017939903986478355, + "loss": 0.8255, + "step": 390 + }, + { + "epoch": 0.632, + "grad_norm": 0.6149687767028809, + "learning_rate": 0.00017888696107795342, + "loss": 0.6616, + "step": 395 + }, + { + "epoch": 0.64, + "grad_norm": 0.4873579144477844, + "learning_rate": 0.000178369345732584, + "loss": 0.7452, + "step": 400 + }, + { + "epoch": 0.648, + "grad_norm": 0.5569061636924744, + "learning_rate": 0.00017784623015670238, + "loss": 0.7652, + "step": 405 + }, + { + "epoch": 0.656, + "grad_norm": 0.5825181603431702, + "learning_rate": 0.00017731765106436073, + "loss": 0.7793, + "step": 410 + }, + { + "epoch": 0.664, + "grad_norm": 0.4047383666038513, + "learning_rate": 0.00017678364555305978, + "loss": 0.6875, + "step": 415 + }, + { + "epoch": 0.672, + "grad_norm": 0.5080836415290833, + "learning_rate": 0.0001762442511011448, + "loss": 0.7465, + "step": 420 + }, + { + "epoch": 0.68, + "grad_norm": 0.5825940370559692, + "learning_rate": 0.00017569950556517566, + "loss": 0.7205, + "step": 425 + }, + { + "epoch": 0.688, + "grad_norm": 0.476992666721344, + "learning_rate": 0.00017514944717726962, + "loss": 0.6589, + "step": 430 + }, + { + "epoch": 0.696, + "grad_norm": 0.7424727082252502, + "learning_rate": 0.00017459411454241822, + "loss": 0.7035, + "step": 435 + }, + { + "epoch": 0.704, + "grad_norm": 0.6544787287712097, + "learning_rate": 0.00017403354663577783, + "loss": 0.787, + "step": 440 + }, + { + "epoch": 0.712, + "grad_norm": 0.49425187706947327, + "learning_rate": 0.00017346778279993415, + "loss": 0.7515, + "step": 445 + }, + { + "epoch": 0.72, + "grad_norm": 0.5473236441612244, + "learning_rate": 0.00017289686274214118, + "loss": 0.7199, + "step": 450 + }, + { + "epoch": 0.728, + "grad_norm": 0.6773544549942017, + "learning_rate": 0.00017232082653153422, + "loss": 0.8037, + "step": 455 + }, + { + "epoch": 0.736, + "grad_norm": 0.6355096101760864, + "learning_rate": 0.00017173971459631787, + "loss": 0.7502, + "step": 460 + }, + { + "epoch": 0.744, + "grad_norm": 0.47867000102996826, + "learning_rate": 0.00017115356772092857, + "loss": 0.7446, + "step": 465 + }, + { + "epoch": 0.752, + "grad_norm": 0.5135357975959778, + "learning_rate": 0.0001705624270431721, + "loss": 0.6507, + "step": 470 + }, + { + "epoch": 0.76, + "grad_norm": 0.48866042494773865, + "learning_rate": 0.00016996633405133655, + "loss": 0.7164, + "step": 475 + }, + { + "epoch": 0.768, + "grad_norm": 0.5892354249954224, + "learning_rate": 0.0001693653305812805, + "loss": 0.7621, + "step": 480 + }, + { + "epoch": 0.776, + "grad_norm": 0.6633970141410828, + "learning_rate": 0.00016875945881349676, + "loss": 0.7623, + "step": 485 + }, + { + "epoch": 0.784, + "grad_norm": 0.6444060802459717, + "learning_rate": 0.000168148761270152, + "loss": 0.6606, + "step": 490 + }, + { + "epoch": 0.792, + "grad_norm": 0.7012648582458496, + "learning_rate": 0.00016753328081210245, + "loss": 0.6941, + "step": 495 + }, + { + "epoch": 0.8, + "grad_norm": 0.7064160704612732, + "learning_rate": 0.00016691306063588583, + "loss": 0.6841, + "step": 500 + }, + { + "epoch": 0.808, + "grad_norm": 0.7241398096084595, + "learning_rate": 0.00016628814427068953, + "loss": 0.6996, + "step": 505 + }, + { + "epoch": 0.816, + "grad_norm": 0.7807374596595764, + "learning_rate": 0.00016565857557529566, + "loss": 0.7542, + "step": 510 + }, + { + "epoch": 0.824, + "grad_norm": 0.763768196105957, + "learning_rate": 0.00016502439873500289, + "loss": 0.7175, + "step": 515 + }, + { + "epoch": 0.832, + "grad_norm": 0.6105090379714966, + "learning_rate": 0.0001643856582585254, + "loss": 0.7565, + "step": 520 + }, + { + "epoch": 0.84, + "grad_norm": 0.5686540603637695, + "learning_rate": 0.000163742398974869, + "loss": 0.7339, + "step": 525 + }, + { + "epoch": 0.848, + "grad_norm": 0.5341500043869019, + "learning_rate": 0.00016309466603018496, + "loss": 0.569, + "step": 530 + }, + { + "epoch": 0.856, + "grad_norm": 0.7274748682975769, + "learning_rate": 0.00016244250488460158, + "loss": 0.7556, + "step": 535 + }, + { + "epoch": 0.864, + "grad_norm": 0.7321165204048157, + "learning_rate": 0.00016178596130903344, + "loss": 0.7084, + "step": 540 + }, + { + "epoch": 0.872, + "grad_norm": 0.5086159110069275, + "learning_rate": 0.00016112508138196917, + "loss": 0.6935, + "step": 545 + }, + { + "epoch": 0.88, + "grad_norm": 0.4714389443397522, + "learning_rate": 0.0001604599114862375, + "loss": 0.7076, + "step": 550 + }, + { + "epoch": 0.888, + "grad_norm": 0.5031452178955078, + "learning_rate": 0.0001597904983057519, + "loss": 0.7151, + "step": 555 + }, + { + "epoch": 0.896, + "grad_norm": 0.7745943665504456, + "learning_rate": 0.0001591168888222342, + "loss": 0.7001, + "step": 560 + }, + { + "epoch": 0.904, + "grad_norm": 0.6076303124427795, + "learning_rate": 0.00015843913031191723, + "loss": 0.7285, + "step": 565 + }, + { + "epoch": 0.912, + "grad_norm": 0.7456529140472412, + "learning_rate": 0.00015775727034222675, + "loss": 0.8041, + "step": 570 + }, + { + "epoch": 0.92, + "grad_norm": 0.5760998725891113, + "learning_rate": 0.0001570713567684432, + "loss": 0.7353, + "step": 575 + }, + { + "epoch": 0.928, + "grad_norm": 0.7057327032089233, + "learning_rate": 0.00015638143773034267, + "loss": 0.7792, + "step": 580 + }, + { + "epoch": 0.936, + "grad_norm": 0.7615967392921448, + "learning_rate": 0.00015568756164881882, + "loss": 1.0121, + "step": 585 + }, + { + "epoch": 0.944, + "grad_norm": 0.6304950714111328, + "learning_rate": 0.000154989777222484, + "loss": 0.7727, + "step": 590 + }, + { + "epoch": 0.952, + "grad_norm": 0.6852543950080872, + "learning_rate": 0.00015428813342425177, + "loss": 0.741, + "step": 595 + }, + { + "epoch": 0.96, + "grad_norm": 0.6379660964012146, + "learning_rate": 0.00015358267949789966, + "loss": 0.6919, + "step": 600 + } + ], + "logging_steps": 5, + "max_steps": 1875, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.749352880943923e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-600/training_args.bin b/checkpoint-600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b72392a9094690809605a9b89988a05f68c770c0 --- /dev/null +++ b/checkpoint-600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f6303cd7c1754cff6fd463f0e4dddf2a566ba7f29bbf2a72fbb560b32d5ce98 +size 5432 diff --git a/checkpoint-700/README.md b/checkpoint-700/README.md new file mode 100644 index 0000000000000000000000000000000000000000..31d07dbe00996b0568fd0eb91d05ac9a95722459 --- /dev/null +++ b/checkpoint-700/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.2-1B +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-700/adapter_config.json b/checkpoint-700/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73202e5c7ba7132c890e52da7442b36a9efff4dd --- /dev/null +++ b/checkpoint-700/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.2-1B", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "down_proj", + "k_proj", + "gate_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-700/adapter_model.safetensors b/checkpoint-700/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1dc151de11ef5bfdbb526f0f2bc350bd676cc053 --- /dev/null +++ b/checkpoint-700/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a23aa58d1133b56f2343cecfc1fdc4f51accb88c723c309d15db0158d28d7870 +size 22573704 diff --git a/checkpoint-700/optimizer.pt b/checkpoint-700/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ad543edf6892ffe3572f3918079d7b69b63ac72f --- /dev/null +++ b/checkpoint-700/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:881c50450bc8ab09626757372688e7328d081b7614cc5dc54149bad94fe84d1e +size 45276986 diff --git a/checkpoint-700/rng_state.pth b/checkpoint-700/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..33cefe6919222ddfa3c3946df69b8e5c5a17a0fc --- /dev/null +++ b/checkpoint-700/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ff264f99d31b522cc7e2a4eac9d38606d0c58a34c0adc74d71e0ca8b371dc36 +size 14244 diff --git a/checkpoint-700/scheduler.pt b/checkpoint-700/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb8beef15c9e90c10dc2d8c3a573ec157ff9b93f --- /dev/null +++ b/checkpoint-700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c0efae093702a8aec8960e4abc7014cf4c16fd4059e4e2f122e7a528ba18078 +size 1064 diff --git a/checkpoint-700/special_tokens_map.json b/checkpoint-700/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/checkpoint-700/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/checkpoint-700/tokenizer.json b/checkpoint-700/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-700/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-700/tokenizer_config.json b/checkpoint-700/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61c03bf2b5e0c03056cebf7a58c8c096dafc1ffc --- /dev/null +++ b/checkpoint-700/tokenizer_config.json @@ -0,0 +1,2065 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end_of_text|>' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|end_of_text|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-700/trainer_state.json b/checkpoint-700/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b3a03812acbdeecf2f214bc34cc8d8c4b127da72 --- /dev/null +++ b/checkpoint-700/trainer_state.json @@ -0,0 +1,1013 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.12, + "eval_steps": 500, + "global_step": 700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008, + "grad_norm": 0.8485889434814453, + "learning_rate": 0.0001999964908278481, + "loss": 1.2049, + "step": 5 + }, + { + "epoch": 0.016, + "grad_norm": 0.47789862751960754, + "learning_rate": 0.00019998596355767805, + "loss": 0.9333, + "step": 10 + }, + { + "epoch": 0.024, + "grad_norm": 1.017558217048645, + "learning_rate": 0.00019996841892833, + "loss": 0.8671, + "step": 15 + }, + { + "epoch": 0.032, + "grad_norm": 0.6610977053642273, + "learning_rate": 0.00019994385817114646, + "loss": 0.7979, + "step": 20 + }, + { + "epoch": 0.04, + "grad_norm": 0.6075429320335388, + "learning_rate": 0.00019991228300988585, + "loss": 0.7662, + "step": 25 + }, + { + "epoch": 0.048, + "grad_norm": 0.6595763564109802, + "learning_rate": 0.00019987369566060176, + "loss": 0.7929, + "step": 30 + }, + { + "epoch": 0.056, + "grad_norm": 0.6968618035316467, + "learning_rate": 0.00019982809883148722, + "loss": 0.7683, + "step": 35 + }, + { + "epoch": 0.064, + "grad_norm": 0.4889592230319977, + "learning_rate": 0.00019977549572268468, + "loss": 0.8667, + "step": 40 + }, + { + "epoch": 0.072, + "grad_norm": 0.6651108264923096, + "learning_rate": 0.0001997158900260614, + "loss": 0.8446, + "step": 45 + }, + { + "epoch": 0.08, + "grad_norm": 0.5898510217666626, + "learning_rate": 0.00019964928592495045, + "loss": 0.9051, + "step": 50 + }, + { + "epoch": 0.088, + "grad_norm": 0.4398016035556793, + "learning_rate": 0.00019957568809385694, + "loss": 0.7235, + "step": 55 + }, + { + "epoch": 0.096, + "grad_norm": 0.6901968121528625, + "learning_rate": 0.00019949510169813003, + "loss": 0.8169, + "step": 60 + }, + { + "epoch": 0.104, + "grad_norm": 0.6267213225364685, + "learning_rate": 0.00019940753239360047, + "loss": 0.8266, + "step": 65 + }, + { + "epoch": 0.112, + "grad_norm": 0.48524895310401917, + "learning_rate": 0.00019931298632618356, + "loss": 0.758, + "step": 70 + }, + { + "epoch": 0.12, + "grad_norm": 0.5294132232666016, + "learning_rate": 0.0001992114701314478, + "loss": 0.7759, + "step": 75 + }, + { + "epoch": 0.128, + "grad_norm": 0.48957982659339905, + "learning_rate": 0.0001991029909341493, + "loss": 0.7797, + "step": 80 + }, + { + "epoch": 0.136, + "grad_norm": 0.645412802696228, + "learning_rate": 0.00019898755634773158, + "loss": 0.7437, + "step": 85 + }, + { + "epoch": 0.144, + "grad_norm": 0.43297675251960754, + "learning_rate": 0.0001988651744737914, + "loss": 0.8043, + "step": 90 + }, + { + "epoch": 0.152, + "grad_norm": 0.5513920783996582, + "learning_rate": 0.00019873585390151003, + "loss": 0.7701, + "step": 95 + }, + { + "epoch": 0.16, + "grad_norm": 0.8462435007095337, + "learning_rate": 0.0001985996037070505, + "loss": 0.709, + "step": 100 + }, + { + "epoch": 0.168, + "grad_norm": 0.6892585158348083, + "learning_rate": 0.00019845643345292054, + "loss": 0.7377, + "step": 105 + }, + { + "epoch": 0.176, + "grad_norm": 0.4617864191532135, + "learning_rate": 0.00019830635318730154, + "loss": 0.8352, + "step": 110 + }, + { + "epoch": 0.184, + "grad_norm": 0.6300354599952698, + "learning_rate": 0.0001981493734433433, + "loss": 0.7738, + "step": 115 + }, + { + "epoch": 0.192, + "grad_norm": 0.8086859583854675, + "learning_rate": 0.0001979855052384247, + "loss": 0.8067, + "step": 120 + }, + { + "epoch": 0.2, + "grad_norm": 0.6272985935211182, + "learning_rate": 0.00019781476007338058, + "loss": 0.7456, + "step": 125 + }, + { + "epoch": 0.208, + "grad_norm": 0.44750839471817017, + "learning_rate": 0.00019763714993169452, + "loss": 0.758, + "step": 130 + }, + { + "epoch": 0.216, + "grad_norm": 0.5053977370262146, + "learning_rate": 0.00019745268727865774, + "loss": 0.7895, + "step": 135 + }, + { + "epoch": 0.224, + "grad_norm": 0.41920769214630127, + "learning_rate": 0.00019726138506049438, + "loss": 0.7302, + "step": 140 + }, + { + "epoch": 0.232, + "grad_norm": 0.38280290365219116, + "learning_rate": 0.00019706325670345275, + "loss": 0.8152, + "step": 145 + }, + { + "epoch": 0.24, + "grad_norm": 0.554710865020752, + "learning_rate": 0.0001968583161128631, + "loss": 0.8461, + "step": 150 + }, + { + "epoch": 0.248, + "grad_norm": 0.5612509250640869, + "learning_rate": 0.00019664657767216176, + "loss": 0.7787, + "step": 155 + }, + { + "epoch": 0.256, + "grad_norm": 0.610614538192749, + "learning_rate": 0.00019642805624188147, + "loss": 0.7574, + "step": 160 + }, + { + "epoch": 0.264, + "grad_norm": 0.679517924785614, + "learning_rate": 0.0001962027671586086, + "loss": 0.8487, + "step": 165 + }, + { + "epoch": 0.272, + "grad_norm": 0.6685434579849243, + "learning_rate": 0.00019597072623390668, + "loss": 0.6611, + "step": 170 + }, + { + "epoch": 0.28, + "grad_norm": 0.480293869972229, + "learning_rate": 0.00019573194975320673, + "loss": 0.7802, + "step": 175 + }, + { + "epoch": 0.288, + "grad_norm": 0.7727369070053101, + "learning_rate": 0.00019548645447466431, + "loss": 0.6727, + "step": 180 + }, + { + "epoch": 0.296, + "grad_norm": 0.6371043920516968, + "learning_rate": 0.00019523425762798329, + "loss": 0.7502, + "step": 185 + }, + { + "epoch": 0.304, + "grad_norm": 0.6399966478347778, + "learning_rate": 0.00019497537691320668, + "loss": 0.8401, + "step": 190 + }, + { + "epoch": 0.312, + "grad_norm": 0.7263137698173523, + "learning_rate": 0.00019470983049947444, + "loss": 0.7494, + "step": 195 + }, + { + "epoch": 0.32, + "grad_norm": 0.402416467666626, + "learning_rate": 0.00019443763702374812, + "loss": 0.7842, + "step": 200 + }, + { + "epoch": 0.328, + "grad_norm": 0.6639626026153564, + "learning_rate": 0.00019415881558950302, + "loss": 0.8082, + "step": 205 + }, + { + "epoch": 0.336, + "grad_norm": 0.5801042914390564, + "learning_rate": 0.00019387338576538744, + "loss": 0.7883, + "step": 210 + }, + { + "epoch": 0.344, + "grad_norm": 0.5533607006072998, + "learning_rate": 0.00019358136758384912, + "loss": 0.7356, + "step": 215 + }, + { + "epoch": 0.352, + "grad_norm": 0.6019654273986816, + "learning_rate": 0.00019328278153972947, + "loss": 0.7891, + "step": 220 + }, + { + "epoch": 0.36, + "grad_norm": 0.5344104170799255, + "learning_rate": 0.00019297764858882514, + "loss": 0.7671, + "step": 225 + }, + { + "epoch": 0.368, + "grad_norm": 0.5494843125343323, + "learning_rate": 0.0001926659901464172, + "loss": 0.6608, + "step": 230 + }, + { + "epoch": 0.376, + "grad_norm": 0.465420126914978, + "learning_rate": 0.00019234782808576824, + "loss": 0.647, + "step": 235 + }, + { + "epoch": 0.384, + "grad_norm": 0.5202775001525879, + "learning_rate": 0.00019202318473658705, + "loss": 0.729, + "step": 240 + }, + { + "epoch": 0.392, + "grad_norm": 0.5757818222045898, + "learning_rate": 0.00019169208288346166, + "loss": 0.6713, + "step": 245 + }, + { + "epoch": 0.4, + "grad_norm": 0.46555572748184204, + "learning_rate": 0.0001913545457642601, + "loss": 0.7049, + "step": 250 + }, + { + "epoch": 0.408, + "grad_norm": 0.5101790428161621, + "learning_rate": 0.00019101059706849957, + "loss": 0.7419, + "step": 255 + }, + { + "epoch": 0.416, + "grad_norm": 0.6083744764328003, + "learning_rate": 0.00019066026093568378, + "loss": 0.7148, + "step": 260 + }, + { + "epoch": 0.424, + "grad_norm": 0.4719640612602234, + "learning_rate": 0.00019030356195360874, + "loss": 0.7493, + "step": 265 + }, + { + "epoch": 0.432, + "grad_norm": 0.7365225553512573, + "learning_rate": 0.0001899405251566371, + "loss": 0.7652, + "step": 270 + }, + { + "epoch": 0.44, + "grad_norm": 0.4452705383300781, + "learning_rate": 0.0001895711760239413, + "loss": 0.7438, + "step": 275 + }, + { + "epoch": 0.448, + "grad_norm": 0.6071786284446716, + "learning_rate": 0.0001891955404777151, + "loss": 0.7683, + "step": 280 + }, + { + "epoch": 0.456, + "grad_norm": 0.5774498581886292, + "learning_rate": 0.00018881364488135448, + "loss": 0.8115, + "step": 285 + }, + { + "epoch": 0.464, + "grad_norm": 0.6134682893753052, + "learning_rate": 0.00018842551603760724, + "loss": 0.8335, + "step": 290 + }, + { + "epoch": 0.472, + "grad_norm": 0.4869893193244934, + "learning_rate": 0.00018803118118669202, + "loss": 0.6933, + "step": 295 + }, + { + "epoch": 0.48, + "grad_norm": 0.6457111239433289, + "learning_rate": 0.00018763066800438636, + "loss": 0.7515, + "step": 300 + }, + { + "epoch": 0.488, + "grad_norm": 0.59674471616745, + "learning_rate": 0.0001872240046000844, + "loss": 0.6931, + "step": 305 + }, + { + "epoch": 0.496, + "grad_norm": 0.44608160853385925, + "learning_rate": 0.00018681121951482393, + "loss": 0.782, + "step": 310 + }, + { + "epoch": 0.504, + "grad_norm": 0.5934664607048035, + "learning_rate": 0.00018639234171928353, + "loss": 0.7361, + "step": 315 + }, + { + "epoch": 0.512, + "grad_norm": 0.49716323614120483, + "learning_rate": 0.0001859674006117491, + "loss": 0.7443, + "step": 320 + }, + { + "epoch": 0.52, + "grad_norm": 0.47995495796203613, + "learning_rate": 0.00018553642601605068, + "loss": 0.7221, + "step": 325 + }, + { + "epoch": 0.528, + "grad_norm": 0.5177399516105652, + "learning_rate": 0.00018509944817946922, + "loss": 0.7622, + "step": 330 + }, + { + "epoch": 0.536, + "grad_norm": 0.6638798713684082, + "learning_rate": 0.0001846564977706138, + "loss": 0.8556, + "step": 335 + }, + { + "epoch": 0.544, + "grad_norm": 0.5056771636009216, + "learning_rate": 0.00018420760587726923, + "loss": 0.7814, + "step": 340 + }, + { + "epoch": 0.552, + "grad_norm": 0.44543707370758057, + "learning_rate": 0.0001837528040042142, + "loss": 0.722, + "step": 345 + }, + { + "epoch": 0.56, + "grad_norm": 0.6765120625495911, + "learning_rate": 0.00018329212407100994, + "loss": 0.7903, + "step": 350 + }, + { + "epoch": 0.568, + "grad_norm": 0.49232372641563416, + "learning_rate": 0.00018282559840976042, + "loss": 0.6996, + "step": 355 + }, + { + "epoch": 0.576, + "grad_norm": 0.47392791509628296, + "learning_rate": 0.00018235325976284275, + "loss": 0.773, + "step": 360 + }, + { + "epoch": 0.584, + "grad_norm": 0.5056615471839905, + "learning_rate": 0.00018187514128060946, + "loss": 0.728, + "step": 365 + }, + { + "epoch": 0.592, + "grad_norm": 0.5857616662979126, + "learning_rate": 0.00018139127651906184, + "loss": 0.7659, + "step": 370 + }, + { + "epoch": 0.6, + "grad_norm": 0.5966864228248596, + "learning_rate": 0.00018090169943749476, + "loss": 0.7039, + "step": 375 + }, + { + "epoch": 0.608, + "grad_norm": 0.4524347484111786, + "learning_rate": 0.00018040644439611348, + "loss": 0.7125, + "step": 380 + }, + { + "epoch": 0.616, + "grad_norm": 0.5570976138114929, + "learning_rate": 0.00017990554615362198, + "loss": 0.698, + "step": 385 + }, + { + "epoch": 0.624, + "grad_norm": 0.6045777201652527, + "learning_rate": 0.00017939903986478355, + "loss": 0.8255, + "step": 390 + }, + { + "epoch": 0.632, + "grad_norm": 0.6149687767028809, + "learning_rate": 0.00017888696107795342, + "loss": 0.6616, + "step": 395 + }, + { + "epoch": 0.64, + "grad_norm": 0.4873579144477844, + "learning_rate": 0.000178369345732584, + "loss": 0.7452, + "step": 400 + }, + { + "epoch": 0.648, + "grad_norm": 0.5569061636924744, + "learning_rate": 0.00017784623015670238, + "loss": 0.7652, + "step": 405 + }, + { + "epoch": 0.656, + "grad_norm": 0.5825181603431702, + "learning_rate": 0.00017731765106436073, + "loss": 0.7793, + "step": 410 + }, + { + "epoch": 0.664, + "grad_norm": 0.4047383666038513, + "learning_rate": 0.00017678364555305978, + "loss": 0.6875, + "step": 415 + }, + { + "epoch": 0.672, + "grad_norm": 0.5080836415290833, + "learning_rate": 0.0001762442511011448, + "loss": 0.7465, + "step": 420 + }, + { + "epoch": 0.68, + "grad_norm": 0.5825940370559692, + "learning_rate": 0.00017569950556517566, + "loss": 0.7205, + "step": 425 + }, + { + "epoch": 0.688, + "grad_norm": 0.476992666721344, + "learning_rate": 0.00017514944717726962, + "loss": 0.6589, + "step": 430 + }, + { + "epoch": 0.696, + "grad_norm": 0.7424727082252502, + "learning_rate": 0.00017459411454241822, + "loss": 0.7035, + "step": 435 + }, + { + "epoch": 0.704, + "grad_norm": 0.6544787287712097, + "learning_rate": 0.00017403354663577783, + "loss": 0.787, + "step": 440 + }, + { + "epoch": 0.712, + "grad_norm": 0.49425187706947327, + "learning_rate": 0.00017346778279993415, + "loss": 0.7515, + "step": 445 + }, + { + "epoch": 0.72, + "grad_norm": 0.5473236441612244, + "learning_rate": 0.00017289686274214118, + "loss": 0.7199, + "step": 450 + }, + { + "epoch": 0.728, + "grad_norm": 0.6773544549942017, + "learning_rate": 0.00017232082653153422, + "loss": 0.8037, + "step": 455 + }, + { + "epoch": 0.736, + "grad_norm": 0.6355096101760864, + "learning_rate": 0.00017173971459631787, + "loss": 0.7502, + "step": 460 + }, + { + "epoch": 0.744, + "grad_norm": 0.47867000102996826, + "learning_rate": 0.00017115356772092857, + "loss": 0.7446, + "step": 465 + }, + { + "epoch": 0.752, + "grad_norm": 0.5135357975959778, + "learning_rate": 0.0001705624270431721, + "loss": 0.6507, + "step": 470 + }, + { + "epoch": 0.76, + "grad_norm": 0.48866042494773865, + "learning_rate": 0.00016996633405133655, + "loss": 0.7164, + "step": 475 + }, + { + "epoch": 0.768, + "grad_norm": 0.5892354249954224, + "learning_rate": 0.0001693653305812805, + "loss": 0.7621, + "step": 480 + }, + { + "epoch": 0.776, + "grad_norm": 0.6633970141410828, + "learning_rate": 0.00016875945881349676, + "loss": 0.7623, + "step": 485 + }, + { + "epoch": 0.784, + "grad_norm": 0.6444060802459717, + "learning_rate": 0.000168148761270152, + "loss": 0.6606, + "step": 490 + }, + { + "epoch": 0.792, + "grad_norm": 0.7012648582458496, + "learning_rate": 0.00016753328081210245, + "loss": 0.6941, + "step": 495 + }, + { + "epoch": 0.8, + "grad_norm": 0.7064160704612732, + "learning_rate": 0.00016691306063588583, + "loss": 0.6841, + "step": 500 + }, + { + "epoch": 0.808, + "grad_norm": 0.7241398096084595, + "learning_rate": 0.00016628814427068953, + "loss": 0.6996, + "step": 505 + }, + { + "epoch": 0.816, + "grad_norm": 0.7807374596595764, + "learning_rate": 0.00016565857557529566, + "loss": 0.7542, + "step": 510 + }, + { + "epoch": 0.824, + "grad_norm": 0.763768196105957, + "learning_rate": 0.00016502439873500289, + "loss": 0.7175, + "step": 515 + }, + { + "epoch": 0.832, + "grad_norm": 0.6105090379714966, + "learning_rate": 0.0001643856582585254, + "loss": 0.7565, + "step": 520 + }, + { + "epoch": 0.84, + "grad_norm": 0.5686540603637695, + "learning_rate": 0.000163742398974869, + "loss": 0.7339, + "step": 525 + }, + { + "epoch": 0.848, + "grad_norm": 0.5341500043869019, + "learning_rate": 0.00016309466603018496, + "loss": 0.569, + "step": 530 + }, + { + "epoch": 0.856, + "grad_norm": 0.7274748682975769, + "learning_rate": 0.00016244250488460158, + "loss": 0.7556, + "step": 535 + }, + { + "epoch": 0.864, + "grad_norm": 0.7321165204048157, + "learning_rate": 0.00016178596130903344, + "loss": 0.7084, + "step": 540 + }, + { + "epoch": 0.872, + "grad_norm": 0.5086159110069275, + "learning_rate": 0.00016112508138196917, + "loss": 0.6935, + "step": 545 + }, + { + "epoch": 0.88, + "grad_norm": 0.4714389443397522, + "learning_rate": 0.0001604599114862375, + "loss": 0.7076, + "step": 550 + }, + { + "epoch": 0.888, + "grad_norm": 0.5031452178955078, + "learning_rate": 0.0001597904983057519, + "loss": 0.7151, + "step": 555 + }, + { + "epoch": 0.896, + "grad_norm": 0.7745943665504456, + "learning_rate": 0.0001591168888222342, + "loss": 0.7001, + "step": 560 + }, + { + "epoch": 0.904, + "grad_norm": 0.6076303124427795, + "learning_rate": 0.00015843913031191723, + "loss": 0.7285, + "step": 565 + }, + { + "epoch": 0.912, + "grad_norm": 0.7456529140472412, + "learning_rate": 0.00015775727034222675, + "loss": 0.8041, + "step": 570 + }, + { + "epoch": 0.92, + "grad_norm": 0.5760998725891113, + "learning_rate": 0.0001570713567684432, + "loss": 0.7353, + "step": 575 + }, + { + "epoch": 0.928, + "grad_norm": 0.7057327032089233, + "learning_rate": 0.00015638143773034267, + "loss": 0.7792, + "step": 580 + }, + { + "epoch": 0.936, + "grad_norm": 0.7615967392921448, + "learning_rate": 0.00015568756164881882, + "loss": 1.0121, + "step": 585 + }, + { + "epoch": 0.944, + "grad_norm": 0.6304950714111328, + "learning_rate": 0.000154989777222484, + "loss": 0.7727, + "step": 590 + }, + { + "epoch": 0.952, + "grad_norm": 0.6852543950080872, + "learning_rate": 0.00015428813342425177, + "loss": 0.741, + "step": 595 + }, + { + "epoch": 0.96, + "grad_norm": 0.6379660964012146, + "learning_rate": 0.00015358267949789966, + "loss": 0.6919, + "step": 600 + }, + { + "epoch": 0.968, + "grad_norm": 0.5846463441848755, + "learning_rate": 0.00015287346495461315, + "loss": 0.7163, + "step": 605 + }, + { + "epoch": 0.976, + "grad_norm": 0.5999557971954346, + "learning_rate": 0.0001521605395695108, + "loss": 0.8152, + "step": 610 + }, + { + "epoch": 0.984, + "grad_norm": 0.5806307196617126, + "learning_rate": 0.00015144395337815064, + "loss": 0.6709, + "step": 615 + }, + { + "epoch": 0.992, + "grad_norm": 0.6559942960739136, + "learning_rate": 0.00015072375667301893, + "loss": 0.6527, + "step": 620 + }, + { + "epoch": 1.0, + "grad_norm": 0.6287715435028076, + "learning_rate": 0.00015000000000000001, + "loss": 0.8194, + "step": 625 + }, + { + "epoch": 1.008, + "grad_norm": 0.616222620010376, + "learning_rate": 0.00014927273415482915, + "loss": 0.6627, + "step": 630 + }, + { + "epoch": 1.016, + "grad_norm": 0.4750412106513977, + "learning_rate": 0.0001485420101795274, + "loss": 0.6366, + "step": 635 + }, + { + "epoch": 1.024, + "grad_norm": 0.5122964978218079, + "learning_rate": 0.00014780787935881923, + "loss": 0.6717, + "step": 640 + }, + { + "epoch": 1.032, + "grad_norm": 0.7382633090019226, + "learning_rate": 0.0001470703932165333, + "loss": 0.6483, + "step": 645 + }, + { + "epoch": 1.04, + "grad_norm": 0.6540554761886597, + "learning_rate": 0.00014632960351198618, + "loss": 0.6151, + "step": 650 + }, + { + "epoch": 1.048, + "grad_norm": 0.4776591956615448, + "learning_rate": 0.00014558556223635003, + "loss": 0.6707, + "step": 655 + }, + { + "epoch": 1.056, + "grad_norm": 0.8012662529945374, + "learning_rate": 0.00014483832160900326, + "loss": 0.6125, + "step": 660 + }, + { + "epoch": 1.064, + "grad_norm": 0.6735953092575073, + "learning_rate": 0.00014408793407386588, + "loss": 0.6206, + "step": 665 + }, + { + "epoch": 1.072, + "grad_norm": 0.5640230774879456, + "learning_rate": 0.00014333445229571873, + "loss": 0.6161, + "step": 670 + }, + { + "epoch": 1.08, + "grad_norm": 0.5928654074668884, + "learning_rate": 0.00014257792915650728, + "loss": 0.6583, + "step": 675 + }, + { + "epoch": 1.088, + "grad_norm": 0.7347397208213806, + "learning_rate": 0.00014181841775163013, + "loss": 0.6222, + "step": 680 + }, + { + "epoch": 1.096, + "grad_norm": 0.593773365020752, + "learning_rate": 0.0001410559713862128, + "loss": 0.716, + "step": 685 + }, + { + "epoch": 1.104, + "grad_norm": 0.6244611144065857, + "learning_rate": 0.00014029064357136628, + "loss": 0.6198, + "step": 690 + }, + { + "epoch": 1.112, + "grad_norm": 0.5083370804786682, + "learning_rate": 0.00013952248802043165, + "loss": 0.6389, + "step": 695 + }, + { + "epoch": 1.12, + "grad_norm": 0.5241413116455078, + "learning_rate": 0.0001387515586452103, + "loss": 0.6842, + "step": 700 + } + ], + "logging_steps": 5, + "max_steps": 1875, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.320122286794342e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-700/training_args.bin b/checkpoint-700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b72392a9094690809605a9b89988a05f68c770c0 --- /dev/null +++ b/checkpoint-700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f6303cd7c1754cff6fd463f0e4dddf2a566ba7f29bbf2a72fbb560b32d5ce98 +size 5432 diff --git a/checkpoint-800/README.md b/checkpoint-800/README.md new file mode 100644 index 0000000000000000000000000000000000000000..31d07dbe00996b0568fd0eb91d05ac9a95722459 --- /dev/null +++ b/checkpoint-800/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.2-1B +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-800/adapter_config.json b/checkpoint-800/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73202e5c7ba7132c890e52da7442b36a9efff4dd --- /dev/null +++ b/checkpoint-800/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.2-1B", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "down_proj", + "k_proj", + "gate_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-800/adapter_model.safetensors b/checkpoint-800/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..852ebb0b1304796b430cc548f7d1159a0c9ff3a4 --- /dev/null +++ b/checkpoint-800/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8600ec44a0efc32cf5c719075638ea8ff944a881acdedabdf6ddb6f04160f517 +size 22573704 diff --git a/checkpoint-800/optimizer.pt b/checkpoint-800/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..571ba4eb6c1a04b58b4ae1f3038553d043162d02 --- /dev/null +++ b/checkpoint-800/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad3cf72a3adc57837cbe4c2b468f6c4339eb1c93554a66c3b1ff5c745f293174 +size 45276986 diff --git a/checkpoint-800/rng_state.pth b/checkpoint-800/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..33cefe6919222ddfa3c3946df69b8e5c5a17a0fc --- /dev/null +++ b/checkpoint-800/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ff264f99d31b522cc7e2a4eac9d38606d0c58a34c0adc74d71e0ca8b371dc36 +size 14244 diff --git a/checkpoint-800/scheduler.pt b/checkpoint-800/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1de02cc10c419098078f64682645763ad1054bbd --- /dev/null +++ b/checkpoint-800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57aea9202da80532acfcd9243ff68caac5326250cc2e03647d8eee08cca191c9 +size 1064 diff --git a/checkpoint-800/special_tokens_map.json b/checkpoint-800/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/checkpoint-800/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/checkpoint-800/tokenizer.json b/checkpoint-800/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-800/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-800/tokenizer_config.json b/checkpoint-800/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61c03bf2b5e0c03056cebf7a58c8c096dafc1ffc --- /dev/null +++ b/checkpoint-800/tokenizer_config.json @@ -0,0 +1,2065 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end_of_text|>' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|end_of_text|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-800/trainer_state.json b/checkpoint-800/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e8f150ace31f5ff7823fe80694faa04a8bca0c9a --- /dev/null +++ b/checkpoint-800/trainer_state.json @@ -0,0 +1,1153 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.28, + "eval_steps": 500, + "global_step": 800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008, + "grad_norm": 0.8485889434814453, + "learning_rate": 0.0001999964908278481, + "loss": 1.2049, + "step": 5 + }, + { + "epoch": 0.016, + "grad_norm": 0.47789862751960754, + "learning_rate": 0.00019998596355767805, + "loss": 0.9333, + "step": 10 + }, + { + "epoch": 0.024, + "grad_norm": 1.017558217048645, + "learning_rate": 0.00019996841892833, + "loss": 0.8671, + "step": 15 + }, + { + "epoch": 0.032, + "grad_norm": 0.6610977053642273, + "learning_rate": 0.00019994385817114646, + "loss": 0.7979, + "step": 20 + }, + { + "epoch": 0.04, + "grad_norm": 0.6075429320335388, + "learning_rate": 0.00019991228300988585, + "loss": 0.7662, + "step": 25 + }, + { + "epoch": 0.048, + "grad_norm": 0.6595763564109802, + "learning_rate": 0.00019987369566060176, + "loss": 0.7929, + "step": 30 + }, + { + "epoch": 0.056, + "grad_norm": 0.6968618035316467, + "learning_rate": 0.00019982809883148722, + "loss": 0.7683, + "step": 35 + }, + { + "epoch": 0.064, + "grad_norm": 0.4889592230319977, + "learning_rate": 0.00019977549572268468, + "loss": 0.8667, + "step": 40 + }, + { + "epoch": 0.072, + "grad_norm": 0.6651108264923096, + "learning_rate": 0.0001997158900260614, + "loss": 0.8446, + "step": 45 + }, + { + "epoch": 0.08, + "grad_norm": 0.5898510217666626, + "learning_rate": 0.00019964928592495045, + "loss": 0.9051, + "step": 50 + }, + { + "epoch": 0.088, + "grad_norm": 0.4398016035556793, + "learning_rate": 0.00019957568809385694, + "loss": 0.7235, + "step": 55 + }, + { + "epoch": 0.096, + "grad_norm": 0.6901968121528625, + "learning_rate": 0.00019949510169813003, + "loss": 0.8169, + "step": 60 + }, + { + "epoch": 0.104, + "grad_norm": 0.6267213225364685, + "learning_rate": 0.00019940753239360047, + "loss": 0.8266, + "step": 65 + }, + { + "epoch": 0.112, + "grad_norm": 0.48524895310401917, + "learning_rate": 0.00019931298632618356, + "loss": 0.758, + "step": 70 + }, + { + "epoch": 0.12, + "grad_norm": 0.5294132232666016, + "learning_rate": 0.0001992114701314478, + "loss": 0.7759, + "step": 75 + }, + { + "epoch": 0.128, + "grad_norm": 0.48957982659339905, + "learning_rate": 0.0001991029909341493, + "loss": 0.7797, + "step": 80 + }, + { + "epoch": 0.136, + "grad_norm": 0.645412802696228, + "learning_rate": 0.00019898755634773158, + "loss": 0.7437, + "step": 85 + }, + { + "epoch": 0.144, + "grad_norm": 0.43297675251960754, + "learning_rate": 0.0001988651744737914, + "loss": 0.8043, + "step": 90 + }, + { + "epoch": 0.152, + "grad_norm": 0.5513920783996582, + "learning_rate": 0.00019873585390151003, + "loss": 0.7701, + "step": 95 + }, + { + "epoch": 0.16, + "grad_norm": 0.8462435007095337, + "learning_rate": 0.0001985996037070505, + "loss": 0.709, + "step": 100 + }, + { + "epoch": 0.168, + "grad_norm": 0.6892585158348083, + "learning_rate": 0.00019845643345292054, + "loss": 0.7377, + "step": 105 + }, + { + "epoch": 0.176, + "grad_norm": 0.4617864191532135, + "learning_rate": 0.00019830635318730154, + "loss": 0.8352, + "step": 110 + }, + { + "epoch": 0.184, + "grad_norm": 0.6300354599952698, + "learning_rate": 0.0001981493734433433, + "loss": 0.7738, + "step": 115 + }, + { + "epoch": 0.192, + "grad_norm": 0.8086859583854675, + "learning_rate": 0.0001979855052384247, + "loss": 0.8067, + "step": 120 + }, + { + "epoch": 0.2, + "grad_norm": 0.6272985935211182, + "learning_rate": 0.00019781476007338058, + "loss": 0.7456, + "step": 125 + }, + { + "epoch": 0.208, + "grad_norm": 0.44750839471817017, + "learning_rate": 0.00019763714993169452, + "loss": 0.758, + "step": 130 + }, + { + "epoch": 0.216, + "grad_norm": 0.5053977370262146, + "learning_rate": 0.00019745268727865774, + "loss": 0.7895, + "step": 135 + }, + { + "epoch": 0.224, + "grad_norm": 0.41920769214630127, + "learning_rate": 0.00019726138506049438, + "loss": 0.7302, + "step": 140 + }, + { + "epoch": 0.232, + "grad_norm": 0.38280290365219116, + "learning_rate": 0.00019706325670345275, + "loss": 0.8152, + "step": 145 + }, + { + "epoch": 0.24, + "grad_norm": 0.554710865020752, + "learning_rate": 0.0001968583161128631, + "loss": 0.8461, + "step": 150 + }, + { + "epoch": 0.248, + "grad_norm": 0.5612509250640869, + "learning_rate": 0.00019664657767216176, + "loss": 0.7787, + "step": 155 + }, + { + "epoch": 0.256, + "grad_norm": 0.610614538192749, + "learning_rate": 0.00019642805624188147, + "loss": 0.7574, + "step": 160 + }, + { + "epoch": 0.264, + "grad_norm": 0.679517924785614, + "learning_rate": 0.0001962027671586086, + "loss": 0.8487, + "step": 165 + }, + { + "epoch": 0.272, + "grad_norm": 0.6685434579849243, + "learning_rate": 0.00019597072623390668, + "loss": 0.6611, + "step": 170 + }, + { + "epoch": 0.28, + "grad_norm": 0.480293869972229, + "learning_rate": 0.00019573194975320673, + "loss": 0.7802, + "step": 175 + }, + { + "epoch": 0.288, + "grad_norm": 0.7727369070053101, + "learning_rate": 0.00019548645447466431, + "loss": 0.6727, + "step": 180 + }, + { + "epoch": 0.296, + "grad_norm": 0.6371043920516968, + "learning_rate": 0.00019523425762798329, + "loss": 0.7502, + "step": 185 + }, + { + "epoch": 0.304, + "grad_norm": 0.6399966478347778, + "learning_rate": 0.00019497537691320668, + "loss": 0.8401, + "step": 190 + }, + { + "epoch": 0.312, + "grad_norm": 0.7263137698173523, + "learning_rate": 0.00019470983049947444, + "loss": 0.7494, + "step": 195 + }, + { + "epoch": 0.32, + "grad_norm": 0.402416467666626, + "learning_rate": 0.00019443763702374812, + "loss": 0.7842, + "step": 200 + }, + { + "epoch": 0.328, + "grad_norm": 0.6639626026153564, + "learning_rate": 0.00019415881558950302, + "loss": 0.8082, + "step": 205 + }, + { + "epoch": 0.336, + "grad_norm": 0.5801042914390564, + "learning_rate": 0.00019387338576538744, + "loss": 0.7883, + "step": 210 + }, + { + "epoch": 0.344, + "grad_norm": 0.5533607006072998, + "learning_rate": 0.00019358136758384912, + "loss": 0.7356, + "step": 215 + }, + { + "epoch": 0.352, + "grad_norm": 0.6019654273986816, + "learning_rate": 0.00019328278153972947, + "loss": 0.7891, + "step": 220 + }, + { + "epoch": 0.36, + "grad_norm": 0.5344104170799255, + "learning_rate": 0.00019297764858882514, + "loss": 0.7671, + "step": 225 + }, + { + "epoch": 0.368, + "grad_norm": 0.5494843125343323, + "learning_rate": 0.0001926659901464172, + "loss": 0.6608, + "step": 230 + }, + { + "epoch": 0.376, + "grad_norm": 0.465420126914978, + "learning_rate": 0.00019234782808576824, + "loss": 0.647, + "step": 235 + }, + { + "epoch": 0.384, + "grad_norm": 0.5202775001525879, + "learning_rate": 0.00019202318473658705, + "loss": 0.729, + "step": 240 + }, + { + "epoch": 0.392, + "grad_norm": 0.5757818222045898, + "learning_rate": 0.00019169208288346166, + "loss": 0.6713, + "step": 245 + }, + { + "epoch": 0.4, + "grad_norm": 0.46555572748184204, + "learning_rate": 0.0001913545457642601, + "loss": 0.7049, + "step": 250 + }, + { + "epoch": 0.408, + "grad_norm": 0.5101790428161621, + "learning_rate": 0.00019101059706849957, + "loss": 0.7419, + "step": 255 + }, + { + "epoch": 0.416, + "grad_norm": 0.6083744764328003, + "learning_rate": 0.00019066026093568378, + "loss": 0.7148, + "step": 260 + }, + { + "epoch": 0.424, + "grad_norm": 0.4719640612602234, + "learning_rate": 0.00019030356195360874, + "loss": 0.7493, + "step": 265 + }, + { + "epoch": 0.432, + "grad_norm": 0.7365225553512573, + "learning_rate": 0.0001899405251566371, + "loss": 0.7652, + "step": 270 + }, + { + "epoch": 0.44, + "grad_norm": 0.4452705383300781, + "learning_rate": 0.0001895711760239413, + "loss": 0.7438, + "step": 275 + }, + { + "epoch": 0.448, + "grad_norm": 0.6071786284446716, + "learning_rate": 0.0001891955404777151, + "loss": 0.7683, + "step": 280 + }, + { + "epoch": 0.456, + "grad_norm": 0.5774498581886292, + "learning_rate": 0.00018881364488135448, + "loss": 0.8115, + "step": 285 + }, + { + "epoch": 0.464, + "grad_norm": 0.6134682893753052, + "learning_rate": 0.00018842551603760724, + "loss": 0.8335, + "step": 290 + }, + { + "epoch": 0.472, + "grad_norm": 0.4869893193244934, + "learning_rate": 0.00018803118118669202, + "loss": 0.6933, + "step": 295 + }, + { + "epoch": 0.48, + "grad_norm": 0.6457111239433289, + "learning_rate": 0.00018763066800438636, + "loss": 0.7515, + "step": 300 + }, + { + "epoch": 0.488, + "grad_norm": 0.59674471616745, + "learning_rate": 0.0001872240046000844, + "loss": 0.6931, + "step": 305 + }, + { + "epoch": 0.496, + "grad_norm": 0.44608160853385925, + "learning_rate": 0.00018681121951482393, + "loss": 0.782, + "step": 310 + }, + { + "epoch": 0.504, + "grad_norm": 0.5934664607048035, + "learning_rate": 0.00018639234171928353, + "loss": 0.7361, + "step": 315 + }, + { + "epoch": 0.512, + "grad_norm": 0.49716323614120483, + "learning_rate": 0.0001859674006117491, + "loss": 0.7443, + "step": 320 + }, + { + "epoch": 0.52, + "grad_norm": 0.47995495796203613, + "learning_rate": 0.00018553642601605068, + "loss": 0.7221, + "step": 325 + }, + { + "epoch": 0.528, + "grad_norm": 0.5177399516105652, + "learning_rate": 0.00018509944817946922, + "loss": 0.7622, + "step": 330 + }, + { + "epoch": 0.536, + "grad_norm": 0.6638798713684082, + "learning_rate": 0.0001846564977706138, + "loss": 0.8556, + "step": 335 + }, + { + "epoch": 0.544, + "grad_norm": 0.5056771636009216, + "learning_rate": 0.00018420760587726923, + "loss": 0.7814, + "step": 340 + }, + { + "epoch": 0.552, + "grad_norm": 0.44543707370758057, + "learning_rate": 0.0001837528040042142, + "loss": 0.722, + "step": 345 + }, + { + "epoch": 0.56, + "grad_norm": 0.6765120625495911, + "learning_rate": 0.00018329212407100994, + "loss": 0.7903, + "step": 350 + }, + { + "epoch": 0.568, + "grad_norm": 0.49232372641563416, + "learning_rate": 0.00018282559840976042, + "loss": 0.6996, + "step": 355 + }, + { + "epoch": 0.576, + "grad_norm": 0.47392791509628296, + "learning_rate": 0.00018235325976284275, + "loss": 0.773, + "step": 360 + }, + { + "epoch": 0.584, + "grad_norm": 0.5056615471839905, + "learning_rate": 0.00018187514128060946, + "loss": 0.728, + "step": 365 + }, + { + "epoch": 0.592, + "grad_norm": 0.5857616662979126, + "learning_rate": 0.00018139127651906184, + "loss": 0.7659, + "step": 370 + }, + { + "epoch": 0.6, + "grad_norm": 0.5966864228248596, + "learning_rate": 0.00018090169943749476, + "loss": 0.7039, + "step": 375 + }, + { + "epoch": 0.608, + "grad_norm": 0.4524347484111786, + "learning_rate": 0.00018040644439611348, + "loss": 0.7125, + "step": 380 + }, + { + "epoch": 0.616, + "grad_norm": 0.5570976138114929, + "learning_rate": 0.00017990554615362198, + "loss": 0.698, + "step": 385 + }, + { + "epoch": 0.624, + "grad_norm": 0.6045777201652527, + "learning_rate": 0.00017939903986478355, + "loss": 0.8255, + "step": 390 + }, + { + "epoch": 0.632, + "grad_norm": 0.6149687767028809, + "learning_rate": 0.00017888696107795342, + "loss": 0.6616, + "step": 395 + }, + { + "epoch": 0.64, + "grad_norm": 0.4873579144477844, + "learning_rate": 0.000178369345732584, + "loss": 0.7452, + "step": 400 + }, + { + "epoch": 0.648, + "grad_norm": 0.5569061636924744, + "learning_rate": 0.00017784623015670238, + "loss": 0.7652, + "step": 405 + }, + { + "epoch": 0.656, + "grad_norm": 0.5825181603431702, + "learning_rate": 0.00017731765106436073, + "loss": 0.7793, + "step": 410 + }, + { + "epoch": 0.664, + "grad_norm": 0.4047383666038513, + "learning_rate": 0.00017678364555305978, + "loss": 0.6875, + "step": 415 + }, + { + "epoch": 0.672, + "grad_norm": 0.5080836415290833, + "learning_rate": 0.0001762442511011448, + "loss": 0.7465, + "step": 420 + }, + { + "epoch": 0.68, + "grad_norm": 0.5825940370559692, + "learning_rate": 0.00017569950556517566, + "loss": 0.7205, + "step": 425 + }, + { + "epoch": 0.688, + "grad_norm": 0.476992666721344, + "learning_rate": 0.00017514944717726962, + "loss": 0.6589, + "step": 430 + }, + { + "epoch": 0.696, + "grad_norm": 0.7424727082252502, + "learning_rate": 0.00017459411454241822, + "loss": 0.7035, + "step": 435 + }, + { + "epoch": 0.704, + "grad_norm": 0.6544787287712097, + "learning_rate": 0.00017403354663577783, + "loss": 0.787, + "step": 440 + }, + { + "epoch": 0.712, + "grad_norm": 0.49425187706947327, + "learning_rate": 0.00017346778279993415, + "loss": 0.7515, + "step": 445 + }, + { + "epoch": 0.72, + "grad_norm": 0.5473236441612244, + "learning_rate": 0.00017289686274214118, + "loss": 0.7199, + "step": 450 + }, + { + "epoch": 0.728, + "grad_norm": 0.6773544549942017, + "learning_rate": 0.00017232082653153422, + "loss": 0.8037, + "step": 455 + }, + { + "epoch": 0.736, + "grad_norm": 0.6355096101760864, + "learning_rate": 0.00017173971459631787, + "loss": 0.7502, + "step": 460 + }, + { + "epoch": 0.744, + "grad_norm": 0.47867000102996826, + "learning_rate": 0.00017115356772092857, + "loss": 0.7446, + "step": 465 + }, + { + "epoch": 0.752, + "grad_norm": 0.5135357975959778, + "learning_rate": 0.0001705624270431721, + "loss": 0.6507, + "step": 470 + }, + { + "epoch": 0.76, + "grad_norm": 0.48866042494773865, + "learning_rate": 0.00016996633405133655, + "loss": 0.7164, + "step": 475 + }, + { + "epoch": 0.768, + "grad_norm": 0.5892354249954224, + "learning_rate": 0.0001693653305812805, + "loss": 0.7621, + "step": 480 + }, + { + "epoch": 0.776, + "grad_norm": 0.6633970141410828, + "learning_rate": 0.00016875945881349676, + "loss": 0.7623, + "step": 485 + }, + { + "epoch": 0.784, + "grad_norm": 0.6444060802459717, + "learning_rate": 0.000168148761270152, + "loss": 0.6606, + "step": 490 + }, + { + "epoch": 0.792, + "grad_norm": 0.7012648582458496, + "learning_rate": 0.00016753328081210245, + "loss": 0.6941, + "step": 495 + }, + { + "epoch": 0.8, + "grad_norm": 0.7064160704612732, + "learning_rate": 0.00016691306063588583, + "loss": 0.6841, + "step": 500 + }, + { + "epoch": 0.808, + "grad_norm": 0.7241398096084595, + "learning_rate": 0.00016628814427068953, + "loss": 0.6996, + "step": 505 + }, + { + "epoch": 0.816, + "grad_norm": 0.7807374596595764, + "learning_rate": 0.00016565857557529566, + "loss": 0.7542, + "step": 510 + }, + { + "epoch": 0.824, + "grad_norm": 0.763768196105957, + "learning_rate": 0.00016502439873500289, + "loss": 0.7175, + "step": 515 + }, + { + "epoch": 0.832, + "grad_norm": 0.6105090379714966, + "learning_rate": 0.0001643856582585254, + "loss": 0.7565, + "step": 520 + }, + { + "epoch": 0.84, + "grad_norm": 0.5686540603637695, + "learning_rate": 0.000163742398974869, + "loss": 0.7339, + "step": 525 + }, + { + "epoch": 0.848, + "grad_norm": 0.5341500043869019, + "learning_rate": 0.00016309466603018496, + "loss": 0.569, + "step": 530 + }, + { + "epoch": 0.856, + "grad_norm": 0.7274748682975769, + "learning_rate": 0.00016244250488460158, + "loss": 0.7556, + "step": 535 + }, + { + "epoch": 0.864, + "grad_norm": 0.7321165204048157, + "learning_rate": 0.00016178596130903344, + "loss": 0.7084, + "step": 540 + }, + { + "epoch": 0.872, + "grad_norm": 0.5086159110069275, + "learning_rate": 0.00016112508138196917, + "loss": 0.6935, + "step": 545 + }, + { + "epoch": 0.88, + "grad_norm": 0.4714389443397522, + "learning_rate": 0.0001604599114862375, + "loss": 0.7076, + "step": 550 + }, + { + "epoch": 0.888, + "grad_norm": 0.5031452178955078, + "learning_rate": 0.0001597904983057519, + "loss": 0.7151, + "step": 555 + }, + { + "epoch": 0.896, + "grad_norm": 0.7745943665504456, + "learning_rate": 0.0001591168888222342, + "loss": 0.7001, + "step": 560 + }, + { + "epoch": 0.904, + "grad_norm": 0.6076303124427795, + "learning_rate": 0.00015843913031191723, + "loss": 0.7285, + "step": 565 + }, + { + "epoch": 0.912, + "grad_norm": 0.7456529140472412, + "learning_rate": 0.00015775727034222675, + "loss": 0.8041, + "step": 570 + }, + { + "epoch": 0.92, + "grad_norm": 0.5760998725891113, + "learning_rate": 0.0001570713567684432, + "loss": 0.7353, + "step": 575 + }, + { + "epoch": 0.928, + "grad_norm": 0.7057327032089233, + "learning_rate": 0.00015638143773034267, + "loss": 0.7792, + "step": 580 + }, + { + "epoch": 0.936, + "grad_norm": 0.7615967392921448, + "learning_rate": 0.00015568756164881882, + "loss": 1.0121, + "step": 585 + }, + { + "epoch": 0.944, + "grad_norm": 0.6304950714111328, + "learning_rate": 0.000154989777222484, + "loss": 0.7727, + "step": 590 + }, + { + "epoch": 0.952, + "grad_norm": 0.6852543950080872, + "learning_rate": 0.00015428813342425177, + "loss": 0.741, + "step": 595 + }, + { + "epoch": 0.96, + "grad_norm": 0.6379660964012146, + "learning_rate": 0.00015358267949789966, + "loss": 0.6919, + "step": 600 + }, + { + "epoch": 0.968, + "grad_norm": 0.5846463441848755, + "learning_rate": 0.00015287346495461315, + "loss": 0.7163, + "step": 605 + }, + { + "epoch": 0.976, + "grad_norm": 0.5999557971954346, + "learning_rate": 0.0001521605395695108, + "loss": 0.8152, + "step": 610 + }, + { + "epoch": 0.984, + "grad_norm": 0.5806307196617126, + "learning_rate": 0.00015144395337815064, + "loss": 0.6709, + "step": 615 + }, + { + "epoch": 0.992, + "grad_norm": 0.6559942960739136, + "learning_rate": 0.00015072375667301893, + "loss": 0.6527, + "step": 620 + }, + { + "epoch": 1.0, + "grad_norm": 0.6287715435028076, + "learning_rate": 0.00015000000000000001, + "loss": 0.8194, + "step": 625 + }, + { + "epoch": 1.008, + "grad_norm": 0.616222620010376, + "learning_rate": 0.00014927273415482915, + "loss": 0.6627, + "step": 630 + }, + { + "epoch": 1.016, + "grad_norm": 0.4750412106513977, + "learning_rate": 0.0001485420101795274, + "loss": 0.6366, + "step": 635 + }, + { + "epoch": 1.024, + "grad_norm": 0.5122964978218079, + "learning_rate": 0.00014780787935881923, + "loss": 0.6717, + "step": 640 + }, + { + "epoch": 1.032, + "grad_norm": 0.7382633090019226, + "learning_rate": 0.0001470703932165333, + "loss": 0.6483, + "step": 645 + }, + { + "epoch": 1.04, + "grad_norm": 0.6540554761886597, + "learning_rate": 0.00014632960351198618, + "loss": 0.6151, + "step": 650 + }, + { + "epoch": 1.048, + "grad_norm": 0.4776591956615448, + "learning_rate": 0.00014558556223635003, + "loss": 0.6707, + "step": 655 + }, + { + "epoch": 1.056, + "grad_norm": 0.8012662529945374, + "learning_rate": 0.00014483832160900326, + "loss": 0.6125, + "step": 660 + }, + { + "epoch": 1.064, + "grad_norm": 0.6735953092575073, + "learning_rate": 0.00014408793407386588, + "loss": 0.6206, + "step": 665 + }, + { + "epoch": 1.072, + "grad_norm": 0.5640230774879456, + "learning_rate": 0.00014333445229571873, + "loss": 0.6161, + "step": 670 + }, + { + "epoch": 1.08, + "grad_norm": 0.5928654074668884, + "learning_rate": 0.00014257792915650728, + "loss": 0.6583, + "step": 675 + }, + { + "epoch": 1.088, + "grad_norm": 0.7347397208213806, + "learning_rate": 0.00014181841775163013, + "loss": 0.6222, + "step": 680 + }, + { + "epoch": 1.096, + "grad_norm": 0.593773365020752, + "learning_rate": 0.0001410559713862128, + "loss": 0.716, + "step": 685 + }, + { + "epoch": 1.104, + "grad_norm": 0.6244611144065857, + "learning_rate": 0.00014029064357136628, + "loss": 0.6198, + "step": 690 + }, + { + "epoch": 1.112, + "grad_norm": 0.5083370804786682, + "learning_rate": 0.00013952248802043165, + "loss": 0.6389, + "step": 695 + }, + { + "epoch": 1.12, + "grad_norm": 0.5241413116455078, + "learning_rate": 0.0001387515586452103, + "loss": 0.6842, + "step": 700 + }, + { + "epoch": 1.1280000000000001, + "grad_norm": 0.524029016494751, + "learning_rate": 0.00013797790955218014, + "loss": 0.6071, + "step": 705 + }, + { + "epoch": 1.1360000000000001, + "grad_norm": 0.5097878575325012, + "learning_rate": 0.00013720159503869815, + "loss": 0.5915, + "step": 710 + }, + { + "epoch": 1.144, + "grad_norm": 0.5782963037490845, + "learning_rate": 0.00013642266958918984, + "loss": 0.6794, + "step": 715 + }, + { + "epoch": 1.152, + "grad_norm": 0.6088266372680664, + "learning_rate": 0.00013564118787132506, + "loss": 0.6773, + "step": 720 + }, + { + "epoch": 1.16, + "grad_norm": 0.7768995761871338, + "learning_rate": 0.00013485720473218154, + "loss": 0.668, + "step": 725 + }, + { + "epoch": 1.168, + "grad_norm": 0.6645551919937134, + "learning_rate": 0.0001340707751943952, + "loss": 0.6997, + "step": 730 + }, + { + "epoch": 1.176, + "grad_norm": 0.9228842258453369, + "learning_rate": 0.00013328195445229868, + "loss": 0.831, + "step": 735 + }, + { + "epoch": 1.184, + "grad_norm": 0.7556049823760986, + "learning_rate": 0.00013249079786804765, + "loss": 0.6378, + "step": 740 + }, + { + "epoch": 1.192, + "grad_norm": 0.832775354385376, + "learning_rate": 0.0001316973609677352, + "loss": 0.6547, + "step": 745 + }, + { + "epoch": 1.2, + "grad_norm": 0.7329304814338684, + "learning_rate": 0.00013090169943749476, + "loss": 0.5808, + "step": 750 + }, + { + "epoch": 1.208, + "grad_norm": 0.7193475961685181, + "learning_rate": 0.00013010386911959206, + "loss": 0.5582, + "step": 755 + }, + { + "epoch": 1.216, + "grad_norm": 0.6274734735488892, + "learning_rate": 0.00012930392600850573, + "loss": 0.5801, + "step": 760 + }, + { + "epoch": 1.224, + "grad_norm": 0.6485865712165833, + "learning_rate": 0.0001285019262469976, + "loss": 0.65, + "step": 765 + }, + { + "epoch": 1.232, + "grad_norm": 0.7164427042007446, + "learning_rate": 0.00012769792612217224, + "loss": 0.6627, + "step": 770 + }, + { + "epoch": 1.24, + "grad_norm": 0.600775957107544, + "learning_rate": 0.00012689198206152657, + "loss": 0.5603, + "step": 775 + }, + { + "epoch": 1.248, + "grad_norm": 0.8377975225448608, + "learning_rate": 0.00012608415062898972, + "loss": 0.6525, + "step": 780 + }, + { + "epoch": 1.256, + "grad_norm": 0.8069924116134644, + "learning_rate": 0.00012527448852095295, + "loss": 0.6731, + "step": 785 + }, + { + "epoch": 1.264, + "grad_norm": 0.6501213908195496, + "learning_rate": 0.00012446305256229073, + "loss": 0.6255, + "step": 790 + }, + { + "epoch": 1.272, + "grad_norm": 0.62812340259552, + "learning_rate": 0.00012364989970237248, + "loss": 0.6585, + "step": 795 + }, + { + "epoch": 1.28, + "grad_norm": 0.5702307820320129, + "learning_rate": 0.00012283508701106557, + "loss": 0.5996, + "step": 800 + } + ], + "logging_steps": 5, + "max_steps": 1875, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.969068985162138e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-800/training_args.bin b/checkpoint-800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b72392a9094690809605a9b89988a05f68c770c0 --- /dev/null +++ b/checkpoint-800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f6303cd7c1754cff6fd463f0e4dddf2a566ba7f29bbf2a72fbb560b32d5ce98 +size 5432 diff --git a/checkpoint-900/README.md b/checkpoint-900/README.md new file mode 100644 index 0000000000000000000000000000000000000000..31d07dbe00996b0568fd0eb91d05ac9a95722459 --- /dev/null +++ b/checkpoint-900/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.2-1B +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-900/adapter_config.json b/checkpoint-900/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73202e5c7ba7132c890e52da7442b36a9efff4dd --- /dev/null +++ b/checkpoint-900/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.2-1B", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "down_proj", + "k_proj", + "gate_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-900/adapter_model.safetensors b/checkpoint-900/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc454c3b10c17b2e5a9e746185cf223f98c0a05f --- /dev/null +++ b/checkpoint-900/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:245e5b414a2b1168e45b8b832a3a851bc0ffd4f8bbf6717d77afa76a2e563449 +size 22573704 diff --git a/checkpoint-900/optimizer.pt b/checkpoint-900/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..db68ad93dd1fe6a6a8a0d74734e10725911657f4 --- /dev/null +++ b/checkpoint-900/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94f18de82d5d8207f45c8c2c55e1cb81b90b7a17d998dd87c091cf410579097b +size 45276986 diff --git a/checkpoint-900/rng_state.pth b/checkpoint-900/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..33cefe6919222ddfa3c3946df69b8e5c5a17a0fc --- /dev/null +++ b/checkpoint-900/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ff264f99d31b522cc7e2a4eac9d38606d0c58a34c0adc74d71e0ca8b371dc36 +size 14244 diff --git a/checkpoint-900/scheduler.pt b/checkpoint-900/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ddf64790e3baf171fba9c07fc38b3192828bff7d --- /dev/null +++ b/checkpoint-900/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fac0ce7603ccc0b8731e0616e37e36d0ee140ae326f3ed4de11624a836b9b56b +size 1064 diff --git a/checkpoint-900/special_tokens_map.json b/checkpoint-900/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/checkpoint-900/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/checkpoint-900/tokenizer.json b/checkpoint-900/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-900/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-900/tokenizer_config.json b/checkpoint-900/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61c03bf2b5e0c03056cebf7a58c8c096dafc1ffc --- /dev/null +++ b/checkpoint-900/tokenizer_config.json @@ -0,0 +1,2065 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end_of_text|>' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|end_of_text|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-900/trainer_state.json b/checkpoint-900/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..59a009d30200dcd643f507f42437d3093b09b51a --- /dev/null +++ b/checkpoint-900/trainer_state.json @@ -0,0 +1,1293 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.44, + "eval_steps": 500, + "global_step": 900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008, + "grad_norm": 0.8485889434814453, + "learning_rate": 0.0001999964908278481, + "loss": 1.2049, + "step": 5 + }, + { + "epoch": 0.016, + "grad_norm": 0.47789862751960754, + "learning_rate": 0.00019998596355767805, + "loss": 0.9333, + "step": 10 + }, + { + "epoch": 0.024, + "grad_norm": 1.017558217048645, + "learning_rate": 0.00019996841892833, + "loss": 0.8671, + "step": 15 + }, + { + "epoch": 0.032, + "grad_norm": 0.6610977053642273, + "learning_rate": 0.00019994385817114646, + "loss": 0.7979, + "step": 20 + }, + { + "epoch": 0.04, + "grad_norm": 0.6075429320335388, + "learning_rate": 0.00019991228300988585, + "loss": 0.7662, + "step": 25 + }, + { + "epoch": 0.048, + "grad_norm": 0.6595763564109802, + "learning_rate": 0.00019987369566060176, + "loss": 0.7929, + "step": 30 + }, + { + "epoch": 0.056, + "grad_norm": 0.6968618035316467, + "learning_rate": 0.00019982809883148722, + "loss": 0.7683, + "step": 35 + }, + { + "epoch": 0.064, + "grad_norm": 0.4889592230319977, + "learning_rate": 0.00019977549572268468, + "loss": 0.8667, + "step": 40 + }, + { + "epoch": 0.072, + "grad_norm": 0.6651108264923096, + "learning_rate": 0.0001997158900260614, + "loss": 0.8446, + "step": 45 + }, + { + "epoch": 0.08, + "grad_norm": 0.5898510217666626, + "learning_rate": 0.00019964928592495045, + "loss": 0.9051, + "step": 50 + }, + { + "epoch": 0.088, + "grad_norm": 0.4398016035556793, + "learning_rate": 0.00019957568809385694, + "loss": 0.7235, + "step": 55 + }, + { + "epoch": 0.096, + "grad_norm": 0.6901968121528625, + "learning_rate": 0.00019949510169813003, + "loss": 0.8169, + "step": 60 + }, + { + "epoch": 0.104, + "grad_norm": 0.6267213225364685, + "learning_rate": 0.00019940753239360047, + "loss": 0.8266, + "step": 65 + }, + { + "epoch": 0.112, + "grad_norm": 0.48524895310401917, + "learning_rate": 0.00019931298632618356, + "loss": 0.758, + "step": 70 + }, + { + "epoch": 0.12, + "grad_norm": 0.5294132232666016, + "learning_rate": 0.0001992114701314478, + "loss": 0.7759, + "step": 75 + }, + { + "epoch": 0.128, + "grad_norm": 0.48957982659339905, + "learning_rate": 0.0001991029909341493, + "loss": 0.7797, + "step": 80 + }, + { + "epoch": 0.136, + "grad_norm": 0.645412802696228, + "learning_rate": 0.00019898755634773158, + "loss": 0.7437, + "step": 85 + }, + { + "epoch": 0.144, + "grad_norm": 0.43297675251960754, + "learning_rate": 0.0001988651744737914, + "loss": 0.8043, + "step": 90 + }, + { + "epoch": 0.152, + "grad_norm": 0.5513920783996582, + "learning_rate": 0.00019873585390151003, + "loss": 0.7701, + "step": 95 + }, + { + "epoch": 0.16, + "grad_norm": 0.8462435007095337, + "learning_rate": 0.0001985996037070505, + "loss": 0.709, + "step": 100 + }, + { + "epoch": 0.168, + "grad_norm": 0.6892585158348083, + "learning_rate": 0.00019845643345292054, + "loss": 0.7377, + "step": 105 + }, + { + "epoch": 0.176, + "grad_norm": 0.4617864191532135, + "learning_rate": 0.00019830635318730154, + "loss": 0.8352, + "step": 110 + }, + { + "epoch": 0.184, + "grad_norm": 0.6300354599952698, + "learning_rate": 0.0001981493734433433, + "loss": 0.7738, + "step": 115 + }, + { + "epoch": 0.192, + "grad_norm": 0.8086859583854675, + "learning_rate": 0.0001979855052384247, + "loss": 0.8067, + "step": 120 + }, + { + "epoch": 0.2, + "grad_norm": 0.6272985935211182, + "learning_rate": 0.00019781476007338058, + "loss": 0.7456, + "step": 125 + }, + { + "epoch": 0.208, + "grad_norm": 0.44750839471817017, + "learning_rate": 0.00019763714993169452, + "loss": 0.758, + "step": 130 + }, + { + "epoch": 0.216, + "grad_norm": 0.5053977370262146, + "learning_rate": 0.00019745268727865774, + "loss": 0.7895, + "step": 135 + }, + { + "epoch": 0.224, + "grad_norm": 0.41920769214630127, + "learning_rate": 0.00019726138506049438, + "loss": 0.7302, + "step": 140 + }, + { + "epoch": 0.232, + "grad_norm": 0.38280290365219116, + "learning_rate": 0.00019706325670345275, + "loss": 0.8152, + "step": 145 + }, + { + "epoch": 0.24, + "grad_norm": 0.554710865020752, + "learning_rate": 0.0001968583161128631, + "loss": 0.8461, + "step": 150 + }, + { + "epoch": 0.248, + "grad_norm": 0.5612509250640869, + "learning_rate": 0.00019664657767216176, + "loss": 0.7787, + "step": 155 + }, + { + "epoch": 0.256, + "grad_norm": 0.610614538192749, + "learning_rate": 0.00019642805624188147, + "loss": 0.7574, + "step": 160 + }, + { + "epoch": 0.264, + "grad_norm": 0.679517924785614, + "learning_rate": 0.0001962027671586086, + "loss": 0.8487, + "step": 165 + }, + { + "epoch": 0.272, + "grad_norm": 0.6685434579849243, + "learning_rate": 0.00019597072623390668, + "loss": 0.6611, + "step": 170 + }, + { + "epoch": 0.28, + "grad_norm": 0.480293869972229, + "learning_rate": 0.00019573194975320673, + "loss": 0.7802, + "step": 175 + }, + { + "epoch": 0.288, + "grad_norm": 0.7727369070053101, + "learning_rate": 0.00019548645447466431, + "loss": 0.6727, + "step": 180 + }, + { + "epoch": 0.296, + "grad_norm": 0.6371043920516968, + "learning_rate": 0.00019523425762798329, + "loss": 0.7502, + "step": 185 + }, + { + "epoch": 0.304, + "grad_norm": 0.6399966478347778, + "learning_rate": 0.00019497537691320668, + "loss": 0.8401, + "step": 190 + }, + { + "epoch": 0.312, + "grad_norm": 0.7263137698173523, + "learning_rate": 0.00019470983049947444, + "loss": 0.7494, + "step": 195 + }, + { + "epoch": 0.32, + "grad_norm": 0.402416467666626, + "learning_rate": 0.00019443763702374812, + "loss": 0.7842, + "step": 200 + }, + { + "epoch": 0.328, + "grad_norm": 0.6639626026153564, + "learning_rate": 0.00019415881558950302, + "loss": 0.8082, + "step": 205 + }, + { + "epoch": 0.336, + "grad_norm": 0.5801042914390564, + "learning_rate": 0.00019387338576538744, + "loss": 0.7883, + "step": 210 + }, + { + "epoch": 0.344, + "grad_norm": 0.5533607006072998, + "learning_rate": 0.00019358136758384912, + "loss": 0.7356, + "step": 215 + }, + { + "epoch": 0.352, + "grad_norm": 0.6019654273986816, + "learning_rate": 0.00019328278153972947, + "loss": 0.7891, + "step": 220 + }, + { + "epoch": 0.36, + "grad_norm": 0.5344104170799255, + "learning_rate": 0.00019297764858882514, + "loss": 0.7671, + "step": 225 + }, + { + "epoch": 0.368, + "grad_norm": 0.5494843125343323, + "learning_rate": 0.0001926659901464172, + "loss": 0.6608, + "step": 230 + }, + { + "epoch": 0.376, + "grad_norm": 0.465420126914978, + "learning_rate": 0.00019234782808576824, + "loss": 0.647, + "step": 235 + }, + { + "epoch": 0.384, + "grad_norm": 0.5202775001525879, + "learning_rate": 0.00019202318473658705, + "loss": 0.729, + "step": 240 + }, + { + "epoch": 0.392, + "grad_norm": 0.5757818222045898, + "learning_rate": 0.00019169208288346166, + "loss": 0.6713, + "step": 245 + }, + { + "epoch": 0.4, + "grad_norm": 0.46555572748184204, + "learning_rate": 0.0001913545457642601, + "loss": 0.7049, + "step": 250 + }, + { + "epoch": 0.408, + "grad_norm": 0.5101790428161621, + "learning_rate": 0.00019101059706849957, + "loss": 0.7419, + "step": 255 + }, + { + "epoch": 0.416, + "grad_norm": 0.6083744764328003, + "learning_rate": 0.00019066026093568378, + "loss": 0.7148, + "step": 260 + }, + { + "epoch": 0.424, + "grad_norm": 0.4719640612602234, + "learning_rate": 0.00019030356195360874, + "loss": 0.7493, + "step": 265 + }, + { + "epoch": 0.432, + "grad_norm": 0.7365225553512573, + "learning_rate": 0.0001899405251566371, + "loss": 0.7652, + "step": 270 + }, + { + "epoch": 0.44, + "grad_norm": 0.4452705383300781, + "learning_rate": 0.0001895711760239413, + "loss": 0.7438, + "step": 275 + }, + { + "epoch": 0.448, + "grad_norm": 0.6071786284446716, + "learning_rate": 0.0001891955404777151, + "loss": 0.7683, + "step": 280 + }, + { + "epoch": 0.456, + "grad_norm": 0.5774498581886292, + "learning_rate": 0.00018881364488135448, + "loss": 0.8115, + "step": 285 + }, + { + "epoch": 0.464, + "grad_norm": 0.6134682893753052, + "learning_rate": 0.00018842551603760724, + "loss": 0.8335, + "step": 290 + }, + { + "epoch": 0.472, + "grad_norm": 0.4869893193244934, + "learning_rate": 0.00018803118118669202, + "loss": 0.6933, + "step": 295 + }, + { + "epoch": 0.48, + "grad_norm": 0.6457111239433289, + "learning_rate": 0.00018763066800438636, + "loss": 0.7515, + "step": 300 + }, + { + "epoch": 0.488, + "grad_norm": 0.59674471616745, + "learning_rate": 0.0001872240046000844, + "loss": 0.6931, + "step": 305 + }, + { + "epoch": 0.496, + "grad_norm": 0.44608160853385925, + "learning_rate": 0.00018681121951482393, + "loss": 0.782, + "step": 310 + }, + { + "epoch": 0.504, + "grad_norm": 0.5934664607048035, + "learning_rate": 0.00018639234171928353, + "loss": 0.7361, + "step": 315 + }, + { + "epoch": 0.512, + "grad_norm": 0.49716323614120483, + "learning_rate": 0.0001859674006117491, + "loss": 0.7443, + "step": 320 + }, + { + "epoch": 0.52, + "grad_norm": 0.47995495796203613, + "learning_rate": 0.00018553642601605068, + "loss": 0.7221, + "step": 325 + }, + { + "epoch": 0.528, + "grad_norm": 0.5177399516105652, + "learning_rate": 0.00018509944817946922, + "loss": 0.7622, + "step": 330 + }, + { + "epoch": 0.536, + "grad_norm": 0.6638798713684082, + "learning_rate": 0.0001846564977706138, + "loss": 0.8556, + "step": 335 + }, + { + "epoch": 0.544, + "grad_norm": 0.5056771636009216, + "learning_rate": 0.00018420760587726923, + "loss": 0.7814, + "step": 340 + }, + { + "epoch": 0.552, + "grad_norm": 0.44543707370758057, + "learning_rate": 0.0001837528040042142, + "loss": 0.722, + "step": 345 + }, + { + "epoch": 0.56, + "grad_norm": 0.6765120625495911, + "learning_rate": 0.00018329212407100994, + "loss": 0.7903, + "step": 350 + }, + { + "epoch": 0.568, + "grad_norm": 0.49232372641563416, + "learning_rate": 0.00018282559840976042, + "loss": 0.6996, + "step": 355 + }, + { + "epoch": 0.576, + "grad_norm": 0.47392791509628296, + "learning_rate": 0.00018235325976284275, + "loss": 0.773, + "step": 360 + }, + { + "epoch": 0.584, + "grad_norm": 0.5056615471839905, + "learning_rate": 0.00018187514128060946, + "loss": 0.728, + "step": 365 + }, + { + "epoch": 0.592, + "grad_norm": 0.5857616662979126, + "learning_rate": 0.00018139127651906184, + "loss": 0.7659, + "step": 370 + }, + { + "epoch": 0.6, + "grad_norm": 0.5966864228248596, + "learning_rate": 0.00018090169943749476, + "loss": 0.7039, + "step": 375 + }, + { + "epoch": 0.608, + "grad_norm": 0.4524347484111786, + "learning_rate": 0.00018040644439611348, + "loss": 0.7125, + "step": 380 + }, + { + "epoch": 0.616, + "grad_norm": 0.5570976138114929, + "learning_rate": 0.00017990554615362198, + "loss": 0.698, + "step": 385 + }, + { + "epoch": 0.624, + "grad_norm": 0.6045777201652527, + "learning_rate": 0.00017939903986478355, + "loss": 0.8255, + "step": 390 + }, + { + "epoch": 0.632, + "grad_norm": 0.6149687767028809, + "learning_rate": 0.00017888696107795342, + "loss": 0.6616, + "step": 395 + }, + { + "epoch": 0.64, + "grad_norm": 0.4873579144477844, + "learning_rate": 0.000178369345732584, + "loss": 0.7452, + "step": 400 + }, + { + "epoch": 0.648, + "grad_norm": 0.5569061636924744, + "learning_rate": 0.00017784623015670238, + "loss": 0.7652, + "step": 405 + }, + { + "epoch": 0.656, + "grad_norm": 0.5825181603431702, + "learning_rate": 0.00017731765106436073, + "loss": 0.7793, + "step": 410 + }, + { + "epoch": 0.664, + "grad_norm": 0.4047383666038513, + "learning_rate": 0.00017678364555305978, + "loss": 0.6875, + "step": 415 + }, + { + "epoch": 0.672, + "grad_norm": 0.5080836415290833, + "learning_rate": 0.0001762442511011448, + "loss": 0.7465, + "step": 420 + }, + { + "epoch": 0.68, + "grad_norm": 0.5825940370559692, + "learning_rate": 0.00017569950556517566, + "loss": 0.7205, + "step": 425 + }, + { + "epoch": 0.688, + "grad_norm": 0.476992666721344, + "learning_rate": 0.00017514944717726962, + "loss": 0.6589, + "step": 430 + }, + { + "epoch": 0.696, + "grad_norm": 0.7424727082252502, + "learning_rate": 0.00017459411454241822, + "loss": 0.7035, + "step": 435 + }, + { + "epoch": 0.704, + "grad_norm": 0.6544787287712097, + "learning_rate": 0.00017403354663577783, + "loss": 0.787, + "step": 440 + }, + { + "epoch": 0.712, + "grad_norm": 0.49425187706947327, + "learning_rate": 0.00017346778279993415, + "loss": 0.7515, + "step": 445 + }, + { + "epoch": 0.72, + "grad_norm": 0.5473236441612244, + "learning_rate": 0.00017289686274214118, + "loss": 0.7199, + "step": 450 + }, + { + "epoch": 0.728, + "grad_norm": 0.6773544549942017, + "learning_rate": 0.00017232082653153422, + "loss": 0.8037, + "step": 455 + }, + { + "epoch": 0.736, + "grad_norm": 0.6355096101760864, + "learning_rate": 0.00017173971459631787, + "loss": 0.7502, + "step": 460 + }, + { + "epoch": 0.744, + "grad_norm": 0.47867000102996826, + "learning_rate": 0.00017115356772092857, + "loss": 0.7446, + "step": 465 + }, + { + "epoch": 0.752, + "grad_norm": 0.5135357975959778, + "learning_rate": 0.0001705624270431721, + "loss": 0.6507, + "step": 470 + }, + { + "epoch": 0.76, + "grad_norm": 0.48866042494773865, + "learning_rate": 0.00016996633405133655, + "loss": 0.7164, + "step": 475 + }, + { + "epoch": 0.768, + "grad_norm": 0.5892354249954224, + "learning_rate": 0.0001693653305812805, + "loss": 0.7621, + "step": 480 + }, + { + "epoch": 0.776, + "grad_norm": 0.6633970141410828, + "learning_rate": 0.00016875945881349676, + "loss": 0.7623, + "step": 485 + }, + { + "epoch": 0.784, + "grad_norm": 0.6444060802459717, + "learning_rate": 0.000168148761270152, + "loss": 0.6606, + "step": 490 + }, + { + "epoch": 0.792, + "grad_norm": 0.7012648582458496, + "learning_rate": 0.00016753328081210245, + "loss": 0.6941, + "step": 495 + }, + { + "epoch": 0.8, + "grad_norm": 0.7064160704612732, + "learning_rate": 0.00016691306063588583, + "loss": 0.6841, + "step": 500 + }, + { + "epoch": 0.808, + "grad_norm": 0.7241398096084595, + "learning_rate": 0.00016628814427068953, + "loss": 0.6996, + "step": 505 + }, + { + "epoch": 0.816, + "grad_norm": 0.7807374596595764, + "learning_rate": 0.00016565857557529566, + "loss": 0.7542, + "step": 510 + }, + { + "epoch": 0.824, + "grad_norm": 0.763768196105957, + "learning_rate": 0.00016502439873500289, + "loss": 0.7175, + "step": 515 + }, + { + "epoch": 0.832, + "grad_norm": 0.6105090379714966, + "learning_rate": 0.0001643856582585254, + "loss": 0.7565, + "step": 520 + }, + { + "epoch": 0.84, + "grad_norm": 0.5686540603637695, + "learning_rate": 0.000163742398974869, + "loss": 0.7339, + "step": 525 + }, + { + "epoch": 0.848, + "grad_norm": 0.5341500043869019, + "learning_rate": 0.00016309466603018496, + "loss": 0.569, + "step": 530 + }, + { + "epoch": 0.856, + "grad_norm": 0.7274748682975769, + "learning_rate": 0.00016244250488460158, + "loss": 0.7556, + "step": 535 + }, + { + "epoch": 0.864, + "grad_norm": 0.7321165204048157, + "learning_rate": 0.00016178596130903344, + "loss": 0.7084, + "step": 540 + }, + { + "epoch": 0.872, + "grad_norm": 0.5086159110069275, + "learning_rate": 0.00016112508138196917, + "loss": 0.6935, + "step": 545 + }, + { + "epoch": 0.88, + "grad_norm": 0.4714389443397522, + "learning_rate": 0.0001604599114862375, + "loss": 0.7076, + "step": 550 + }, + { + "epoch": 0.888, + "grad_norm": 0.5031452178955078, + "learning_rate": 0.0001597904983057519, + "loss": 0.7151, + "step": 555 + }, + { + "epoch": 0.896, + "grad_norm": 0.7745943665504456, + "learning_rate": 0.0001591168888222342, + "loss": 0.7001, + "step": 560 + }, + { + "epoch": 0.904, + "grad_norm": 0.6076303124427795, + "learning_rate": 0.00015843913031191723, + "loss": 0.7285, + "step": 565 + }, + { + "epoch": 0.912, + "grad_norm": 0.7456529140472412, + "learning_rate": 0.00015775727034222675, + "loss": 0.8041, + "step": 570 + }, + { + "epoch": 0.92, + "grad_norm": 0.5760998725891113, + "learning_rate": 0.0001570713567684432, + "loss": 0.7353, + "step": 575 + }, + { + "epoch": 0.928, + "grad_norm": 0.7057327032089233, + "learning_rate": 0.00015638143773034267, + "loss": 0.7792, + "step": 580 + }, + { + "epoch": 0.936, + "grad_norm": 0.7615967392921448, + "learning_rate": 0.00015568756164881882, + "loss": 1.0121, + "step": 585 + }, + { + "epoch": 0.944, + "grad_norm": 0.6304950714111328, + "learning_rate": 0.000154989777222484, + "loss": 0.7727, + "step": 590 + }, + { + "epoch": 0.952, + "grad_norm": 0.6852543950080872, + "learning_rate": 0.00015428813342425177, + "loss": 0.741, + "step": 595 + }, + { + "epoch": 0.96, + "grad_norm": 0.6379660964012146, + "learning_rate": 0.00015358267949789966, + "loss": 0.6919, + "step": 600 + }, + { + "epoch": 0.968, + "grad_norm": 0.5846463441848755, + "learning_rate": 0.00015287346495461315, + "loss": 0.7163, + "step": 605 + }, + { + "epoch": 0.976, + "grad_norm": 0.5999557971954346, + "learning_rate": 0.0001521605395695108, + "loss": 0.8152, + "step": 610 + }, + { + "epoch": 0.984, + "grad_norm": 0.5806307196617126, + "learning_rate": 0.00015144395337815064, + "loss": 0.6709, + "step": 615 + }, + { + "epoch": 0.992, + "grad_norm": 0.6559942960739136, + "learning_rate": 0.00015072375667301893, + "loss": 0.6527, + "step": 620 + }, + { + "epoch": 1.0, + "grad_norm": 0.6287715435028076, + "learning_rate": 0.00015000000000000001, + "loss": 0.8194, + "step": 625 + }, + { + "epoch": 1.008, + "grad_norm": 0.616222620010376, + "learning_rate": 0.00014927273415482915, + "loss": 0.6627, + "step": 630 + }, + { + "epoch": 1.016, + "grad_norm": 0.4750412106513977, + "learning_rate": 0.0001485420101795274, + "loss": 0.6366, + "step": 635 + }, + { + "epoch": 1.024, + "grad_norm": 0.5122964978218079, + "learning_rate": 0.00014780787935881923, + "loss": 0.6717, + "step": 640 + }, + { + "epoch": 1.032, + "grad_norm": 0.7382633090019226, + "learning_rate": 0.0001470703932165333, + "loss": 0.6483, + "step": 645 + }, + { + "epoch": 1.04, + "grad_norm": 0.6540554761886597, + "learning_rate": 0.00014632960351198618, + "loss": 0.6151, + "step": 650 + }, + { + "epoch": 1.048, + "grad_norm": 0.4776591956615448, + "learning_rate": 0.00014558556223635003, + "loss": 0.6707, + "step": 655 + }, + { + "epoch": 1.056, + "grad_norm": 0.8012662529945374, + "learning_rate": 0.00014483832160900326, + "loss": 0.6125, + "step": 660 + }, + { + "epoch": 1.064, + "grad_norm": 0.6735953092575073, + "learning_rate": 0.00014408793407386588, + "loss": 0.6206, + "step": 665 + }, + { + "epoch": 1.072, + "grad_norm": 0.5640230774879456, + "learning_rate": 0.00014333445229571873, + "loss": 0.6161, + "step": 670 + }, + { + "epoch": 1.08, + "grad_norm": 0.5928654074668884, + "learning_rate": 0.00014257792915650728, + "loss": 0.6583, + "step": 675 + }, + { + "epoch": 1.088, + "grad_norm": 0.7347397208213806, + "learning_rate": 0.00014181841775163013, + "loss": 0.6222, + "step": 680 + }, + { + "epoch": 1.096, + "grad_norm": 0.593773365020752, + "learning_rate": 0.0001410559713862128, + "loss": 0.716, + "step": 685 + }, + { + "epoch": 1.104, + "grad_norm": 0.6244611144065857, + "learning_rate": 0.00014029064357136628, + "loss": 0.6198, + "step": 690 + }, + { + "epoch": 1.112, + "grad_norm": 0.5083370804786682, + "learning_rate": 0.00013952248802043165, + "loss": 0.6389, + "step": 695 + }, + { + "epoch": 1.12, + "grad_norm": 0.5241413116455078, + "learning_rate": 0.0001387515586452103, + "loss": 0.6842, + "step": 700 + }, + { + "epoch": 1.1280000000000001, + "grad_norm": 0.524029016494751, + "learning_rate": 0.00013797790955218014, + "loss": 0.6071, + "step": 705 + }, + { + "epoch": 1.1360000000000001, + "grad_norm": 0.5097878575325012, + "learning_rate": 0.00013720159503869815, + "loss": 0.5915, + "step": 710 + }, + { + "epoch": 1.144, + "grad_norm": 0.5782963037490845, + "learning_rate": 0.00013642266958918984, + "loss": 0.6794, + "step": 715 + }, + { + "epoch": 1.152, + "grad_norm": 0.6088266372680664, + "learning_rate": 0.00013564118787132506, + "loss": 0.6773, + "step": 720 + }, + { + "epoch": 1.16, + "grad_norm": 0.7768995761871338, + "learning_rate": 0.00013485720473218154, + "loss": 0.668, + "step": 725 + }, + { + "epoch": 1.168, + "grad_norm": 0.6645551919937134, + "learning_rate": 0.0001340707751943952, + "loss": 0.6997, + "step": 730 + }, + { + "epoch": 1.176, + "grad_norm": 0.9228842258453369, + "learning_rate": 0.00013328195445229868, + "loss": 0.831, + "step": 735 + }, + { + "epoch": 1.184, + "grad_norm": 0.7556049823760986, + "learning_rate": 0.00013249079786804765, + "loss": 0.6378, + "step": 740 + }, + { + "epoch": 1.192, + "grad_norm": 0.832775354385376, + "learning_rate": 0.0001316973609677352, + "loss": 0.6547, + "step": 745 + }, + { + "epoch": 1.2, + "grad_norm": 0.7329304814338684, + "learning_rate": 0.00013090169943749476, + "loss": 0.5808, + "step": 750 + }, + { + "epoch": 1.208, + "grad_norm": 0.7193475961685181, + "learning_rate": 0.00013010386911959206, + "loss": 0.5582, + "step": 755 + }, + { + "epoch": 1.216, + "grad_norm": 0.6274734735488892, + "learning_rate": 0.00012930392600850573, + "loss": 0.5801, + "step": 760 + }, + { + "epoch": 1.224, + "grad_norm": 0.6485865712165833, + "learning_rate": 0.0001285019262469976, + "loss": 0.65, + "step": 765 + }, + { + "epoch": 1.232, + "grad_norm": 0.7164427042007446, + "learning_rate": 0.00012769792612217224, + "loss": 0.6627, + "step": 770 + }, + { + "epoch": 1.24, + "grad_norm": 0.600775957107544, + "learning_rate": 0.00012689198206152657, + "loss": 0.5603, + "step": 775 + }, + { + "epoch": 1.248, + "grad_norm": 0.8377975225448608, + "learning_rate": 0.00012608415062898972, + "loss": 0.6525, + "step": 780 + }, + { + "epoch": 1.256, + "grad_norm": 0.8069924116134644, + "learning_rate": 0.00012527448852095295, + "loss": 0.6731, + "step": 785 + }, + { + "epoch": 1.264, + "grad_norm": 0.6501213908195496, + "learning_rate": 0.00012446305256229073, + "loss": 0.6255, + "step": 790 + }, + { + "epoch": 1.272, + "grad_norm": 0.62812340259552, + "learning_rate": 0.00012364989970237248, + "loss": 0.6585, + "step": 795 + }, + { + "epoch": 1.28, + "grad_norm": 0.5702307820320129, + "learning_rate": 0.00012283508701106557, + "loss": 0.5996, + "step": 800 + }, + { + "epoch": 1.288, + "grad_norm": 0.6311281323432922, + "learning_rate": 0.00012201867167473015, + "loss": 0.6355, + "step": 805 + }, + { + "epoch": 1.296, + "grad_norm": 0.5885419249534607, + "learning_rate": 0.00012120071099220549, + "loss": 0.6615, + "step": 810 + }, + { + "epoch": 1.304, + "grad_norm": 0.5239307284355164, + "learning_rate": 0.0001203812623707885, + "loss": 0.6096, + "step": 815 + }, + { + "epoch": 1.312, + "grad_norm": 0.6101869940757751, + "learning_rate": 0.00011956038332220483, + "loss": 0.5984, + "step": 820 + }, + { + "epoch": 1.32, + "grad_norm": 0.4395413100719452, + "learning_rate": 0.00011873813145857249, + "loss": 0.5569, + "step": 825 + }, + { + "epoch": 1.328, + "grad_norm": 0.8984820246696472, + "learning_rate": 0.00011791456448835825, + "loss": 0.7088, + "step": 830 + }, + { + "epoch": 1.336, + "grad_norm": 0.7709664106369019, + "learning_rate": 0.00011708974021232769, + "loss": 0.6731, + "step": 835 + }, + { + "epoch": 1.3439999999999999, + "grad_norm": 0.6782217025756836, + "learning_rate": 0.00011626371651948838, + "loss": 0.6188, + "step": 840 + }, + { + "epoch": 1.3519999999999999, + "grad_norm": 0.6427358984947205, + "learning_rate": 0.00011543655138302714, + "loss": 0.7004, + "step": 845 + }, + { + "epoch": 1.3599999999999999, + "grad_norm": 0.5902594923973083, + "learning_rate": 0.00011460830285624118, + "loss": 0.5884, + "step": 850 + }, + { + "epoch": 1.3679999999999999, + "grad_norm": 0.5935835838317871, + "learning_rate": 0.0001137790290684638, + "loss": 0.5739, + "step": 855 + }, + { + "epoch": 1.376, + "grad_norm": 0.6752728223800659, + "learning_rate": 0.00011294878822098469, + "loss": 0.6435, + "step": 860 + }, + { + "epoch": 1.384, + "grad_norm": 0.7927135825157166, + "learning_rate": 0.00011211763858296507, + "loss": 0.6897, + "step": 865 + }, + { + "epoch": 1.392, + "grad_norm": 0.714499294757843, + "learning_rate": 0.00011128563848734816, + "loss": 0.6641, + "step": 870 + }, + { + "epoch": 1.4, + "grad_norm": 0.7086356282234192, + "learning_rate": 0.00011045284632676536, + "loss": 0.6273, + "step": 875 + }, + { + "epoch": 1.408, + "grad_norm": 0.6125518679618835, + "learning_rate": 0.00010961932054943778, + "loss": 0.6437, + "step": 880 + }, + { + "epoch": 1.416, + "grad_norm": 0.5635287165641785, + "learning_rate": 0.00010878511965507434, + "loss": 0.6345, + "step": 885 + }, + { + "epoch": 1.424, + "grad_norm": 0.47936007380485535, + "learning_rate": 0.00010795030219076599, + "loss": 0.5913, + "step": 890 + }, + { + "epoch": 1.432, + "grad_norm": 0.7142558097839355, + "learning_rate": 0.00010711492674687671, + "loss": 0.6482, + "step": 895 + }, + { + "epoch": 1.44, + "grad_norm": 0.5252729058265686, + "learning_rate": 0.00010627905195293135, + "loss": 0.6165, + "step": 900 + } + ], + "logging_steps": 5, + "max_steps": 1875, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5.645753588278886e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-900/training_args.bin b/checkpoint-900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b72392a9094690809605a9b89988a05f68c770c0 --- /dev/null +++ b/checkpoint-900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f6303cd7c1754cff6fd463f0e4dddf2a566ba7f29bbf2a72fbb560b32d5ce98 +size 5432 diff --git a/llamaboard_config.yaml b/llamaboard_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0bf5a4170417056674de5ecab1cdc8de81f0d4ee --- /dev/null +++ b/llamaboard_config.yaml @@ -0,0 +1,72 @@ +top.booster: auto +top.checkpoint_path: [] +top.finetuning_type: lora +top.model_name: Llama-3.2-1B +top.quantization_bit: none +top.quantization_method: bitsandbytes +top.rope_scaling: linear +top.template: default +train.additional_target: '' +train.badam_mode: layer +train.badam_switch_interval: 50 +train.badam_switch_mode: ascending +train.badam_update_ratio: 0.05 +train.batch_size: 16 +train.compute_type: bf16 +train.create_new_adapter: false +train.cutoff_len: 1024 +train.dataset: +- mathinstruct +train.dataset_dir: data +train.ds_offload: false +train.ds_stage: none +train.extra_args: '{"optim": "adamw_torch"}' +train.freeze_extra_modules: '' +train.freeze_trainable_layers: 2 +train.freeze_trainable_modules: all +train.galore_rank: 16 +train.galore_scale: 0.25 +train.galore_target: all +train.galore_update_interval: 200 +train.gradient_accumulation_steps: 1 +train.learning_rate: 2e-4 +train.logging_steps: 5 +train.lora_alpha: 16 +train.lora_dropout: 0 +train.lora_rank: 8 +train.lora_target: '' +train.loraplus_lr_ratio: 0 +train.lr_scheduler_type: cosine +train.mask_history: false +train.max_grad_norm: '1.0' +train.max_samples: '10000' +train.neat_packing: false +train.neftune_alpha: 0 +train.num_train_epochs: '3.0' +train.packing: false +train.ppo_score_norm: false +train.ppo_whiten_rewards: false +train.pref_beta: 0.1 +train.pref_ftx: 0 +train.pref_loss: sigmoid +train.report_to: false +train.resize_vocab: false +train.reward_model: null +train.save_steps: 100 +train.shift_attn: false +train.swanlab_api_key: '' +train.swanlab_mode: cloud +train.swanlab_project: llamafactory +train.swanlab_run_name: '' +train.swanlab_workspace: '' +train.train_on_prompt: false +train.training_stage: Supervised Fine-Tuning +train.use_badam: false +train.use_dora: false +train.use_galore: false +train.use_llama_pro: false +train.use_pissa: false +train.use_rslora: false +train.use_swanlab: false +train.val_size: 0 +train.warmup_steps: 0 diff --git a/running_log.txt b/running_log.txt new file mode 100644 index 0000000000000000000000000000000000000000..a7e928a9f8adc460388591ee147d12bbdd4f933d --- /dev/null +++ b/running_log.txt @@ -0,0 +1,1874 @@ +[INFO|2024-12-28 20:32:56] parser.py:355 >> Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.bfloat16 + +[INFO|2024-12-28 20:32:56] configuration_utils.py:679 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/config.json + +[INFO|2024-12-28 20:32:56] configuration_utils.py:746 >> Model config LlamaConfig { + "_name_or_path": "meta-llama/Llama-3.2-1B", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": true, + "vocab_size": 128256 +} + + +[INFO|2024-12-28 20:32:56] tokenization_utils_base.py:2211 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/tokenizer.json + +[INFO|2024-12-28 20:32:56] tokenization_utils_base.py:2211 >> loading file tokenizer.model from cache at None + +[INFO|2024-12-28 20:32:56] tokenization_utils_base.py:2211 >> loading file added_tokens.json from cache at None + +[INFO|2024-12-28 20:32:56] tokenization_utils_base.py:2211 >> loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/special_tokens_map.json + +[INFO|2024-12-28 20:32:56] tokenization_utils_base.py:2211 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/tokenizer_config.json + +[INFO|2024-12-28 20:32:57] tokenization_utils_base.py:2475 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. + +[INFO|2024-12-28 20:32:57] configuration_utils.py:679 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/config.json + +[INFO|2024-12-28 20:32:57] configuration_utils.py:746 >> Model config LlamaConfig { + "_name_or_path": "meta-llama/Llama-3.2-1B", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": true, + "vocab_size": 128256 +} + + +[INFO|2024-12-28 20:32:57] tokenization_utils_base.py:2211 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/tokenizer.json + +[INFO|2024-12-28 20:32:57] tokenization_utils_base.py:2211 >> loading file tokenizer.model from cache at None + +[INFO|2024-12-28 20:32:57] tokenization_utils_base.py:2211 >> loading file added_tokens.json from cache at None + +[INFO|2024-12-28 20:32:57] tokenization_utils_base.py:2211 >> loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/special_tokens_map.json + +[INFO|2024-12-28 20:32:57] tokenization_utils_base.py:2211 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/tokenizer_config.json + +[INFO|2024-12-28 20:32:58] tokenization_utils_base.py:2475 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. + +[INFO|2024-12-28 20:32:58] logging.py:157 >> Add pad token: <|end_of_text|> + +[INFO|2024-12-28 20:32:58] logging.py:157 >> Loading dataset TIGER-Lab/MathInstruct... + +[INFO|2024-12-28 20:33:03] configuration_utils.py:679 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/config.json + +[INFO|2024-12-28 20:33:03] configuration_utils.py:746 >> Model config LlamaConfig { + "_name_or_path": "meta-llama/Llama-3.2-1B", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": true, + "vocab_size": 128256 +} + + +[WARNING|2024-12-28 20:33:03] logging.py:162 >> Input length is smaller than max length. Consider increase input length. + +[INFO|2024-12-28 20:33:03] logging.py:157 >> Using linear scaling strategy and setting scaling factor to 1.0 + +[INFO|2024-12-28 20:33:03] modeling_utils.py:3937 >> loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/model.safetensors + +[INFO|2024-12-28 20:33:03] modeling_utils.py:1670 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16. + +[INFO|2024-12-28 20:33:03] configuration_utils.py:1096 >> Generate config GenerationConfig { + "bos_token_id": 128000, + "eos_token_id": 128001 +} + + +[INFO|2024-12-28 20:33:04] modeling_utils.py:4800 >> All model checkpoint weights were used when initializing LlamaForCausalLM. + + +[INFO|2024-12-28 20:33:04] modeling_utils.py:4808 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at meta-llama/Llama-3.2-1B. +If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. + +[INFO|2024-12-28 20:33:04] configuration_utils.py:1051 >> loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/generation_config.json + +[INFO|2024-12-28 20:33:04] configuration_utils.py:1096 >> Generate config GenerationConfig { + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "temperature": 0.6, + "top_p": 0.9 +} + + +[INFO|2024-12-28 20:33:04] logging.py:157 >> Gradient checkpointing enabled. + +[INFO|2024-12-28 20:33:04] logging.py:157 >> Using torch SDPA for faster training and inference. + +[INFO|2024-12-28 20:33:04] logging.py:157 >> Upcasting trainable params to float32. + +[INFO|2024-12-28 20:33:04] logging.py:157 >> Fine-tuning method: LoRA + +[INFO|2024-12-28 20:33:04] logging.py:157 >> Found linear modules: o_proj,down_proj,k_proj,gate_proj,up_proj,v_proj,q_proj + +[INFO|2024-12-28 20:33:05] logging.py:157 >> trainable params: 5,636,096 || all params: 1,241,450,496 || trainable%: 0.4540 + +[INFO|2024-12-28 20:33:05] trainer.py:698 >> Using auto half precision backend + +[INFO|2024-12-28 20:33:05] trainer.py:2313 >> ***** Running training ***** + +[INFO|2024-12-28 20:33:05] trainer.py:2314 >> Num examples = 10,000 + +[INFO|2024-12-28 20:33:05] trainer.py:2315 >> Num Epochs = 3 + +[INFO|2024-12-28 20:33:05] trainer.py:2316 >> Instantaneous batch size per device = 16 + +[INFO|2024-12-28 20:33:05] trainer.py:2319 >> Total train batch size (w. parallel, distributed & accumulation) = 16 + +[INFO|2024-12-28 20:33:05] trainer.py:2320 >> Gradient Accumulation steps = 1 + +[INFO|2024-12-28 20:33:05] trainer.py:2321 >> Total optimization steps = 1,875 + +[INFO|2024-12-28 20:33:05] trainer.py:2322 >> Number of trainable parameters = 5,636,096 + +[INFO|2024-12-28 20:33:11] logging.py:157 >> {'loss': 1.2049, 'learning_rate': 2.0000e-04, 'epoch': 0.01} + +[INFO|2024-12-28 20:33:18] logging.py:157 >> {'loss': 0.9333, 'learning_rate': 1.9999e-04, 'epoch': 0.02} + +[INFO|2024-12-28 20:33:25] logging.py:157 >> {'loss': 0.8671, 'learning_rate': 1.9997e-04, 'epoch': 0.02} + +[INFO|2024-12-28 20:33:31] logging.py:157 >> {'loss': 0.7979, 'learning_rate': 1.9994e-04, 'epoch': 0.03} + +[INFO|2024-12-28 20:33:36] logging.py:157 >> {'loss': 0.7662, 'learning_rate': 1.9991e-04, 'epoch': 0.04} + +[INFO|2024-12-28 20:33:43] logging.py:157 >> {'loss': 0.7929, 'learning_rate': 1.9987e-04, 'epoch': 0.05} + +[INFO|2024-12-28 20:33:49] logging.py:157 >> {'loss': 0.7683, 'learning_rate': 1.9983e-04, 'epoch': 0.06} + +[INFO|2024-12-28 20:33:56] logging.py:157 >> {'loss': 0.8667, 'learning_rate': 1.9978e-04, 'epoch': 0.06} + +[INFO|2024-12-28 20:34:04] logging.py:157 >> {'loss': 0.8446, 'learning_rate': 1.9972e-04, 'epoch': 0.07} + +[INFO|2024-12-28 20:34:09] logging.py:157 >> {'loss': 0.9051, 'learning_rate': 1.9965e-04, 'epoch': 0.08} + +[INFO|2024-12-28 20:34:14] logging.py:157 >> {'loss': 0.7235, 'learning_rate': 1.9958e-04, 'epoch': 0.09} + +[INFO|2024-12-28 20:34:18] logging.py:157 >> {'loss': 0.8169, 'learning_rate': 1.9950e-04, 'epoch': 0.10} + +[INFO|2024-12-28 20:34:24] logging.py:157 >> {'loss': 0.8266, 'learning_rate': 1.9941e-04, 'epoch': 0.10} + +[INFO|2024-12-28 20:34:29] logging.py:157 >> {'loss': 0.7580, 'learning_rate': 1.9931e-04, 'epoch': 0.11} + +[INFO|2024-12-28 20:34:36] logging.py:157 >> {'loss': 0.7759, 'learning_rate': 1.9921e-04, 'epoch': 0.12} + +[INFO|2024-12-28 20:34:42] logging.py:157 >> {'loss': 0.7797, 'learning_rate': 1.9910e-04, 'epoch': 0.13} + +[INFO|2024-12-28 20:34:49] logging.py:157 >> {'loss': 0.7437, 'learning_rate': 1.9899e-04, 'epoch': 0.14} + +[INFO|2024-12-28 20:34:56] logging.py:157 >> {'loss': 0.8043, 'learning_rate': 1.9887e-04, 'epoch': 0.14} + +[INFO|2024-12-28 20:35:02] logging.py:157 >> {'loss': 0.7701, 'learning_rate': 1.9874e-04, 'epoch': 0.15} + +[INFO|2024-12-28 20:35:10] logging.py:157 >> {'loss': 0.7090, 'learning_rate': 1.9860e-04, 'epoch': 0.16} + +[INFO|2024-12-28 20:35:10] trainer.py:3801 >> Saving model checkpoint to saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-100 + +[INFO|2024-12-28 20:35:10] configuration_utils.py:679 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/config.json + +[INFO|2024-12-28 20:35:10] configuration_utils.py:746 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": true, + "vocab_size": 128256 +} + + +[INFO|2024-12-28 20:35:10] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-100/tokenizer_config.json + +[INFO|2024-12-28 20:35:10] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-100/special_tokens_map.json + +[INFO|2024-12-28 20:35:17] logging.py:157 >> {'loss': 0.7377, 'learning_rate': 1.9846e-04, 'epoch': 0.17} + +[INFO|2024-12-28 20:35:23] logging.py:157 >> {'loss': 0.8352, 'learning_rate': 1.9831e-04, 'epoch': 0.18} + +[INFO|2024-12-28 20:35:30] logging.py:157 >> {'loss': 0.7738, 'learning_rate': 1.9815e-04, 'epoch': 0.18} + +[INFO|2024-12-28 20:35:35] logging.py:157 >> {'loss': 0.8067, 'learning_rate': 1.9799e-04, 'epoch': 0.19} + +[INFO|2024-12-28 20:35:41] logging.py:157 >> {'loss': 0.7456, 'learning_rate': 1.9781e-04, 'epoch': 0.20} + +[INFO|2024-12-28 20:35:48] logging.py:157 >> {'loss': 0.7580, 'learning_rate': 1.9764e-04, 'epoch': 0.21} + +[INFO|2024-12-28 20:35:54] logging.py:157 >> {'loss': 0.7895, 'learning_rate': 1.9745e-04, 'epoch': 0.22} + +[INFO|2024-12-28 20:36:02] logging.py:157 >> {'loss': 0.7302, 'learning_rate': 1.9726e-04, 'epoch': 0.22} + +[INFO|2024-12-28 20:36:07] logging.py:157 >> {'loss': 0.8152, 'learning_rate': 1.9706e-04, 'epoch': 0.23} + +[INFO|2024-12-28 20:36:12] logging.py:157 >> {'loss': 0.8461, 'learning_rate': 1.9686e-04, 'epoch': 0.24} + +[INFO|2024-12-28 20:36:18] logging.py:157 >> {'loss': 0.7787, 'learning_rate': 1.9665e-04, 'epoch': 0.25} + +[INFO|2024-12-28 20:36:24] logging.py:157 >> {'loss': 0.7574, 'learning_rate': 1.9643e-04, 'epoch': 0.26} + +[INFO|2024-12-28 20:36:28] logging.py:157 >> {'loss': 0.8487, 'learning_rate': 1.9620e-04, 'epoch': 0.26} + +[INFO|2024-12-28 20:36:34] logging.py:157 >> {'loss': 0.6611, 'learning_rate': 1.9597e-04, 'epoch': 0.27} + +[INFO|2024-12-28 20:36:41] logging.py:157 >> {'loss': 0.7802, 'learning_rate': 1.9573e-04, 'epoch': 0.28} + +[INFO|2024-12-28 20:36:48] logging.py:157 >> {'loss': 0.6727, 'learning_rate': 1.9549e-04, 'epoch': 0.29} + +[INFO|2024-12-28 20:36:53] logging.py:157 >> {'loss': 0.7502, 'learning_rate': 1.9523e-04, 'epoch': 0.30} + +[INFO|2024-12-28 20:36:59] logging.py:157 >> {'loss': 0.8401, 'learning_rate': 1.9498e-04, 'epoch': 0.30} + +[INFO|2024-12-28 20:37:04] logging.py:157 >> {'loss': 0.7494, 'learning_rate': 1.9471e-04, 'epoch': 0.31} + +[INFO|2024-12-28 20:37:10] logging.py:157 >> {'loss': 0.7842, 'learning_rate': 1.9444e-04, 'epoch': 0.32} + +[INFO|2024-12-28 20:37:10] trainer.py:3801 >> Saving model checkpoint to saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-200 + +[INFO|2024-12-28 20:37:11] configuration_utils.py:679 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/config.json + +[INFO|2024-12-28 20:37:11] configuration_utils.py:746 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": true, + "vocab_size": 128256 +} + + +[INFO|2024-12-28 20:37:11] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-200/tokenizer_config.json + +[INFO|2024-12-28 20:37:11] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-200/special_tokens_map.json + +[INFO|2024-12-28 20:37:18] logging.py:157 >> {'loss': 0.8082, 'learning_rate': 1.9416e-04, 'epoch': 0.33} + +[INFO|2024-12-28 20:37:23] logging.py:157 >> {'loss': 0.7883, 'learning_rate': 1.9387e-04, 'epoch': 0.34} + +[INFO|2024-12-28 20:37:31] logging.py:157 >> {'loss': 0.7356, 'learning_rate': 1.9358e-04, 'epoch': 0.34} + +[INFO|2024-12-28 20:37:37] logging.py:157 >> {'loss': 0.7891, 'learning_rate': 1.9328e-04, 'epoch': 0.35} + +[INFO|2024-12-28 20:37:44] logging.py:157 >> {'loss': 0.7671, 'learning_rate': 1.9298e-04, 'epoch': 0.36} + +[INFO|2024-12-28 20:37:51] logging.py:157 >> {'loss': 0.6608, 'learning_rate': 1.9267e-04, 'epoch': 0.37} + +[INFO|2024-12-28 20:38:00] logging.py:157 >> {'loss': 0.6470, 'learning_rate': 1.9235e-04, 'epoch': 0.38} + +[INFO|2024-12-28 20:38:08] logging.py:157 >> {'loss': 0.7290, 'learning_rate': 1.9202e-04, 'epoch': 0.38} + +[INFO|2024-12-28 20:38:13] logging.py:157 >> {'loss': 0.6713, 'learning_rate': 1.9169e-04, 'epoch': 0.39} + +[INFO|2024-12-28 20:38:22] logging.py:157 >> {'loss': 0.7049, 'learning_rate': 1.9135e-04, 'epoch': 0.40} + +[INFO|2024-12-28 20:38:30] logging.py:157 >> {'loss': 0.7419, 'learning_rate': 1.9101e-04, 'epoch': 0.41} + +[INFO|2024-12-28 20:38:35] logging.py:157 >> {'loss': 0.7148, 'learning_rate': 1.9066e-04, 'epoch': 0.42} + +[INFO|2024-12-28 20:38:43] logging.py:157 >> {'loss': 0.7493, 'learning_rate': 1.9030e-04, 'epoch': 0.42} + +[INFO|2024-12-28 20:38:49] logging.py:157 >> {'loss': 0.7652, 'learning_rate': 1.8994e-04, 'epoch': 0.43} + +[INFO|2024-12-28 20:38:56] logging.py:157 >> {'loss': 0.7438, 'learning_rate': 1.8957e-04, 'epoch': 0.44} + +[INFO|2024-12-28 20:39:01] logging.py:157 >> {'loss': 0.7683, 'learning_rate': 1.8920e-04, 'epoch': 0.45} + +[INFO|2024-12-28 20:39:06] logging.py:157 >> {'loss': 0.8115, 'learning_rate': 1.8881e-04, 'epoch': 0.46} + +[INFO|2024-12-28 20:39:12] logging.py:157 >> {'loss': 0.8335, 'learning_rate': 1.8843e-04, 'epoch': 0.46} + +[INFO|2024-12-28 20:39:19] logging.py:157 >> {'loss': 0.6933, 'learning_rate': 1.8803e-04, 'epoch': 0.47} + +[INFO|2024-12-28 20:39:23] logging.py:157 >> {'loss': 0.7515, 'learning_rate': 1.8763e-04, 'epoch': 0.48} + +[INFO|2024-12-28 20:39:23] trainer.py:3801 >> Saving model checkpoint to saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-300 + +[INFO|2024-12-28 20:39:23] configuration_utils.py:679 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/config.json + +[INFO|2024-12-28 20:39:23] configuration_utils.py:746 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": true, + "vocab_size": 128256 +} + + +[INFO|2024-12-28 20:39:24] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-300/tokenizer_config.json + +[INFO|2024-12-28 20:39:24] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-300/special_tokens_map.json + +[INFO|2024-12-28 20:39:31] logging.py:157 >> {'loss': 0.6931, 'learning_rate': 1.8722e-04, 'epoch': 0.49} + +[INFO|2024-12-28 20:39:37] logging.py:157 >> {'loss': 0.7820, 'learning_rate': 1.8681e-04, 'epoch': 0.50} + +[INFO|2024-12-28 20:39:44] logging.py:157 >> {'loss': 0.7361, 'learning_rate': 1.8639e-04, 'epoch': 0.50} + +[INFO|2024-12-28 20:39:49] logging.py:157 >> {'loss': 0.7443, 'learning_rate': 1.8597e-04, 'epoch': 0.51} + +[INFO|2024-12-28 20:39:57] logging.py:157 >> {'loss': 0.7221, 'learning_rate': 1.8554e-04, 'epoch': 0.52} + +[INFO|2024-12-28 20:40:02] logging.py:157 >> {'loss': 0.7622, 'learning_rate': 1.8510e-04, 'epoch': 0.53} + +[INFO|2024-12-28 20:40:08] logging.py:157 >> {'loss': 0.8556, 'learning_rate': 1.8466e-04, 'epoch': 0.54} + +[INFO|2024-12-28 20:40:15] logging.py:157 >> {'loss': 0.7814, 'learning_rate': 1.8421e-04, 'epoch': 0.54} + +[INFO|2024-12-28 20:40:24] logging.py:157 >> {'loss': 0.7220, 'learning_rate': 1.8375e-04, 'epoch': 0.55} + +[INFO|2024-12-28 20:40:28] logging.py:157 >> {'loss': 0.7903, 'learning_rate': 1.8329e-04, 'epoch': 0.56} + +[INFO|2024-12-28 20:40:33] logging.py:157 >> {'loss': 0.6996, 'learning_rate': 1.8283e-04, 'epoch': 0.57} + +[INFO|2024-12-28 20:40:39] logging.py:157 >> {'loss': 0.7730, 'learning_rate': 1.8235e-04, 'epoch': 0.58} + +[INFO|2024-12-28 20:40:45] logging.py:157 >> {'loss': 0.7280, 'learning_rate': 1.8188e-04, 'epoch': 0.58} + +[INFO|2024-12-28 20:40:51] logging.py:157 >> {'loss': 0.7659, 'learning_rate': 1.8139e-04, 'epoch': 0.59} + +[INFO|2024-12-28 20:40:56] logging.py:157 >> {'loss': 0.7039, 'learning_rate': 1.8090e-04, 'epoch': 0.60} + +[INFO|2024-12-28 20:41:02] logging.py:157 >> {'loss': 0.7125, 'learning_rate': 1.8041e-04, 'epoch': 0.61} + +[INFO|2024-12-28 20:41:09] logging.py:157 >> {'loss': 0.6980, 'learning_rate': 1.7991e-04, 'epoch': 0.62} + +[INFO|2024-12-28 20:41:14] logging.py:157 >> {'loss': 0.8255, 'learning_rate': 1.7940e-04, 'epoch': 0.62} + +[INFO|2024-12-28 20:41:19] logging.py:157 >> {'loss': 0.6616, 'learning_rate': 1.7889e-04, 'epoch': 0.63} + +[INFO|2024-12-28 20:41:24] logging.py:157 >> {'loss': 0.7452, 'learning_rate': 1.7837e-04, 'epoch': 0.64} + +[INFO|2024-12-28 20:41:24] trainer.py:3801 >> Saving model checkpoint to saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-400 + +[INFO|2024-12-28 20:41:24] configuration_utils.py:679 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/config.json + +[INFO|2024-12-28 20:41:24] configuration_utils.py:746 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": true, + "vocab_size": 128256 +} + + +[INFO|2024-12-28 20:41:25] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-400/tokenizer_config.json + +[INFO|2024-12-28 20:41:25] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-400/special_tokens_map.json + +[INFO|2024-12-28 20:41:32] logging.py:157 >> {'loss': 0.7652, 'learning_rate': 1.7785e-04, 'epoch': 0.65} + +[INFO|2024-12-28 20:41:40] logging.py:157 >> {'loss': 0.7793, 'learning_rate': 1.7732e-04, 'epoch': 0.66} + +[INFO|2024-12-28 20:41:45] logging.py:157 >> {'loss': 0.6875, 'learning_rate': 1.7678e-04, 'epoch': 0.66} + +[INFO|2024-12-28 20:41:54] logging.py:157 >> {'loss': 0.7465, 'learning_rate': 1.7624e-04, 'epoch': 0.67} + +[INFO|2024-12-28 20:42:01] logging.py:157 >> {'loss': 0.7205, 'learning_rate': 1.7570e-04, 'epoch': 0.68} + +[INFO|2024-12-28 20:42:08] logging.py:157 >> {'loss': 0.6589, 'learning_rate': 1.7515e-04, 'epoch': 0.69} + +[INFO|2024-12-28 20:42:13] logging.py:157 >> {'loss': 0.7035, 'learning_rate': 1.7459e-04, 'epoch': 0.70} + +[INFO|2024-12-28 20:42:20] logging.py:157 >> {'loss': 0.7870, 'learning_rate': 1.7403e-04, 'epoch': 0.70} + +[INFO|2024-12-28 20:42:29] logging.py:157 >> {'loss': 0.7515, 'learning_rate': 1.7347e-04, 'epoch': 0.71} + +[INFO|2024-12-28 20:42:34] logging.py:157 >> {'loss': 0.7199, 'learning_rate': 1.7290e-04, 'epoch': 0.72} + +[INFO|2024-12-28 20:42:41] logging.py:157 >> {'loss': 0.8037, 'learning_rate': 1.7232e-04, 'epoch': 0.73} + +[INFO|2024-12-28 20:42:46] logging.py:157 >> {'loss': 0.7502, 'learning_rate': 1.7174e-04, 'epoch': 0.74} + +[INFO|2024-12-28 20:42:53] logging.py:157 >> {'loss': 0.7446, 'learning_rate': 1.7115e-04, 'epoch': 0.74} + +[INFO|2024-12-28 20:43:00] logging.py:157 >> {'loss': 0.6507, 'learning_rate': 1.7056e-04, 'epoch': 0.75} + +[INFO|2024-12-28 20:43:06] logging.py:157 >> {'loss': 0.7164, 'learning_rate': 1.6997e-04, 'epoch': 0.76} + +[INFO|2024-12-28 20:43:12] logging.py:157 >> {'loss': 0.7621, 'learning_rate': 1.6937e-04, 'epoch': 0.77} + +[INFO|2024-12-28 20:43:18] logging.py:157 >> {'loss': 0.7623, 'learning_rate': 1.6876e-04, 'epoch': 0.78} + +[INFO|2024-12-28 20:43:26] logging.py:157 >> {'loss': 0.6606, 'learning_rate': 1.6815e-04, 'epoch': 0.78} + +[INFO|2024-12-28 20:43:34] logging.py:157 >> {'loss': 0.6941, 'learning_rate': 1.6753e-04, 'epoch': 0.79} + +[INFO|2024-12-28 20:43:39] logging.py:157 >> {'loss': 0.6841, 'learning_rate': 1.6691e-04, 'epoch': 0.80} + +[INFO|2024-12-28 20:43:39] trainer.py:3801 >> Saving model checkpoint to saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-500 + +[INFO|2024-12-28 20:43:39] configuration_utils.py:679 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/config.json + +[INFO|2024-12-28 20:43:39] configuration_utils.py:746 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": true, + "vocab_size": 128256 +} + + +[INFO|2024-12-28 20:43:40] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-500/tokenizer_config.json + +[INFO|2024-12-28 20:43:40] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-500/special_tokens_map.json + +[INFO|2024-12-28 20:43:47] logging.py:157 >> {'loss': 0.6996, 'learning_rate': 1.6629e-04, 'epoch': 0.81} + +[INFO|2024-12-28 20:43:54] logging.py:157 >> {'loss': 0.7542, 'learning_rate': 1.6566e-04, 'epoch': 0.82} + +[INFO|2024-12-28 20:44:00] logging.py:157 >> {'loss': 0.7175, 'learning_rate': 1.6502e-04, 'epoch': 0.82} + +[INFO|2024-12-28 20:44:05] logging.py:157 >> {'loss': 0.7565, 'learning_rate': 1.6439e-04, 'epoch': 0.83} + +[INFO|2024-12-28 20:44:10] logging.py:157 >> {'loss': 0.7339, 'learning_rate': 1.6374e-04, 'epoch': 0.84} + +[INFO|2024-12-28 20:44:16] logging.py:157 >> {'loss': 0.5690, 'learning_rate': 1.6309e-04, 'epoch': 0.85} + +[INFO|2024-12-28 20:44:20] logging.py:157 >> {'loss': 0.7556, 'learning_rate': 1.6244e-04, 'epoch': 0.86} + +[INFO|2024-12-28 20:44:25] logging.py:157 >> {'loss': 0.7084, 'learning_rate': 1.6179e-04, 'epoch': 0.86} + +[INFO|2024-12-28 20:44:31] logging.py:157 >> {'loss': 0.6935, 'learning_rate': 1.6113e-04, 'epoch': 0.87} + +[INFO|2024-12-28 20:44:36] logging.py:157 >> {'loss': 0.7076, 'learning_rate': 1.6046e-04, 'epoch': 0.88} + +[INFO|2024-12-28 20:44:42] logging.py:157 >> {'loss': 0.7151, 'learning_rate': 1.5979e-04, 'epoch': 0.89} + +[INFO|2024-12-28 20:44:48] logging.py:157 >> {'loss': 0.7001, 'learning_rate': 1.5912e-04, 'epoch': 0.90} + +[INFO|2024-12-28 20:44:53] logging.py:157 >> {'loss': 0.7285, 'learning_rate': 1.5844e-04, 'epoch': 0.90} + +[INFO|2024-12-28 20:44:59] logging.py:157 >> {'loss': 0.8041, 'learning_rate': 1.5776e-04, 'epoch': 0.91} + +[INFO|2024-12-28 20:45:05] logging.py:157 >> {'loss': 0.7353, 'learning_rate': 1.5707e-04, 'epoch': 0.92} + +[INFO|2024-12-28 20:45:11] logging.py:157 >> {'loss': 0.7792, 'learning_rate': 1.5638e-04, 'epoch': 0.93} + +[INFO|2024-12-28 20:45:18] logging.py:157 >> {'loss': 1.0121, 'learning_rate': 1.5569e-04, 'epoch': 0.94} + +[INFO|2024-12-28 20:45:24] logging.py:157 >> {'loss': 0.7727, 'learning_rate': 1.5499e-04, 'epoch': 0.94} + +[INFO|2024-12-28 20:45:30] logging.py:157 >> {'loss': 0.7410, 'learning_rate': 1.5429e-04, 'epoch': 0.95} + +[INFO|2024-12-28 20:45:36] logging.py:157 >> {'loss': 0.6919, 'learning_rate': 1.5358e-04, 'epoch': 0.96} + +[INFO|2024-12-28 20:45:36] trainer.py:3801 >> Saving model checkpoint to saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-600 + +[INFO|2024-12-28 20:45:36] configuration_utils.py:679 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/config.json + +[INFO|2024-12-28 20:45:36] configuration_utils.py:746 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": true, + "vocab_size": 128256 +} + + +[INFO|2024-12-28 20:45:36] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-600/tokenizer_config.json + +[INFO|2024-12-28 20:45:36] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-600/special_tokens_map.json + +[INFO|2024-12-28 20:45:44] logging.py:157 >> {'loss': 0.7163, 'learning_rate': 1.5287e-04, 'epoch': 0.97} + +[INFO|2024-12-28 20:45:48] logging.py:157 >> {'loss': 0.8152, 'learning_rate': 1.5216e-04, 'epoch': 0.98} + +[INFO|2024-12-28 20:45:54] logging.py:157 >> {'loss': 0.6709, 'learning_rate': 1.5144e-04, 'epoch': 0.98} + +[INFO|2024-12-28 20:46:00] logging.py:157 >> {'loss': 0.6527, 'learning_rate': 1.5072e-04, 'epoch': 0.99} + +[INFO|2024-12-28 20:46:05] logging.py:157 >> {'loss': 0.8194, 'learning_rate': 1.5000e-04, 'epoch': 1.00} + +[INFO|2024-12-28 20:46:10] logging.py:157 >> {'loss': 0.6627, 'learning_rate': 1.4927e-04, 'epoch': 1.01} + +[INFO|2024-12-28 20:46:16] logging.py:157 >> {'loss': 0.6366, 'learning_rate': 1.4854e-04, 'epoch': 1.02} + +[INFO|2024-12-28 20:46:21] logging.py:157 >> {'loss': 0.6717, 'learning_rate': 1.4781e-04, 'epoch': 1.02} + +[INFO|2024-12-28 20:46:26] logging.py:157 >> {'loss': 0.6483, 'learning_rate': 1.4707e-04, 'epoch': 1.03} + +[INFO|2024-12-28 20:46:31] logging.py:157 >> {'loss': 0.6151, 'learning_rate': 1.4633e-04, 'epoch': 1.04} + +[INFO|2024-12-28 20:46:38] logging.py:157 >> {'loss': 0.6707, 'learning_rate': 1.4559e-04, 'epoch': 1.05} + +[INFO|2024-12-28 20:46:42] logging.py:157 >> {'loss': 0.6125, 'learning_rate': 1.4484e-04, 'epoch': 1.06} + +[INFO|2024-12-28 20:46:48] logging.py:157 >> {'loss': 0.6206, 'learning_rate': 1.4409e-04, 'epoch': 1.06} + +[INFO|2024-12-28 20:46:54] logging.py:157 >> {'loss': 0.6161, 'learning_rate': 1.4333e-04, 'epoch': 1.07} + +[INFO|2024-12-28 20:47:00] logging.py:157 >> {'loss': 0.6583, 'learning_rate': 1.4258e-04, 'epoch': 1.08} + +[INFO|2024-12-28 20:47:05] logging.py:157 >> {'loss': 0.6222, 'learning_rate': 1.4182e-04, 'epoch': 1.09} + +[INFO|2024-12-28 20:47:10] logging.py:157 >> {'loss': 0.7160, 'learning_rate': 1.4106e-04, 'epoch': 1.10} + +[INFO|2024-12-28 20:47:16] logging.py:157 >> {'loss': 0.6198, 'learning_rate': 1.4029e-04, 'epoch': 1.10} + +[INFO|2024-12-28 20:47:24] logging.py:157 >> {'loss': 0.6389, 'learning_rate': 1.3952e-04, 'epoch': 1.11} + +[INFO|2024-12-28 20:47:30] logging.py:157 >> {'loss': 0.6842, 'learning_rate': 1.3875e-04, 'epoch': 1.12} + +[INFO|2024-12-28 20:47:30] trainer.py:3801 >> Saving model checkpoint to saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-700 + +[INFO|2024-12-28 20:47:30] configuration_utils.py:679 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/config.json + +[INFO|2024-12-28 20:47:30] configuration_utils.py:746 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": true, + "vocab_size": 128256 +} + + +[INFO|2024-12-28 20:47:30] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-700/tokenizer_config.json + +[INFO|2024-12-28 20:47:30] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-700/special_tokens_map.json + +[INFO|2024-12-28 20:47:38] logging.py:157 >> {'loss': 0.6071, 'learning_rate': 1.3798e-04, 'epoch': 1.13} + +[INFO|2024-12-28 20:47:46] logging.py:157 >> {'loss': 0.5915, 'learning_rate': 1.3720e-04, 'epoch': 1.14} + +[INFO|2024-12-28 20:47:51] logging.py:157 >> {'loss': 0.6794, 'learning_rate': 1.3642e-04, 'epoch': 1.14} + +[INFO|2024-12-28 20:47:57] logging.py:157 >> {'loss': 0.6773, 'learning_rate': 1.3564e-04, 'epoch': 1.15} + +[INFO|2024-12-28 20:48:04] logging.py:157 >> {'loss': 0.6680, 'learning_rate': 1.3486e-04, 'epoch': 1.16} + +[INFO|2024-12-28 20:48:12] logging.py:157 >> {'loss': 0.6997, 'learning_rate': 1.3407e-04, 'epoch': 1.17} + +[INFO|2024-12-28 20:48:20] logging.py:157 >> {'loss': 0.8310, 'learning_rate': 1.3328e-04, 'epoch': 1.18} + +[INFO|2024-12-28 20:48:27] logging.py:157 >> {'loss': 0.6378, 'learning_rate': 1.3249e-04, 'epoch': 1.18} + +[INFO|2024-12-28 20:48:32] logging.py:157 >> {'loss': 0.6547, 'learning_rate': 1.3170e-04, 'epoch': 1.19} + +[INFO|2024-12-28 20:48:37] logging.py:157 >> {'loss': 0.5808, 'learning_rate': 1.3090e-04, 'epoch': 1.20} + +[INFO|2024-12-28 20:48:43] logging.py:157 >> {'loss': 0.5582, 'learning_rate': 1.3010e-04, 'epoch': 1.21} + +[INFO|2024-12-28 20:48:50] logging.py:157 >> {'loss': 0.5801, 'learning_rate': 1.2930e-04, 'epoch': 1.22} + +[INFO|2024-12-28 20:48:56] logging.py:157 >> {'loss': 0.6500, 'learning_rate': 1.2850e-04, 'epoch': 1.22} + +[INFO|2024-12-28 20:49:03] logging.py:157 >> {'loss': 0.6627, 'learning_rate': 1.2770e-04, 'epoch': 1.23} + +[INFO|2024-12-28 20:49:08] logging.py:157 >> {'loss': 0.5603, 'learning_rate': 1.2689e-04, 'epoch': 1.24} + +[INFO|2024-12-28 20:49:13] logging.py:157 >> {'loss': 0.6525, 'learning_rate': 1.2608e-04, 'epoch': 1.25} + +[INFO|2024-12-28 20:49:20] logging.py:157 >> {'loss': 0.6731, 'learning_rate': 1.2527e-04, 'epoch': 1.26} + +[INFO|2024-12-28 20:49:27] logging.py:157 >> {'loss': 0.6255, 'learning_rate': 1.2446e-04, 'epoch': 1.26} + +[INFO|2024-12-28 20:49:33] logging.py:157 >> {'loss': 0.6585, 'learning_rate': 1.2365e-04, 'epoch': 1.27} + +[INFO|2024-12-28 20:49:41] logging.py:157 >> {'loss': 0.5996, 'learning_rate': 1.2284e-04, 'epoch': 1.28} + +[INFO|2024-12-28 20:49:41] trainer.py:3801 >> Saving model checkpoint to saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-800 + +[INFO|2024-12-28 20:49:41] configuration_utils.py:679 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/config.json + +[INFO|2024-12-28 20:49:41] configuration_utils.py:746 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": true, + "vocab_size": 128256 +} + + +[INFO|2024-12-28 20:49:41] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-800/tokenizer_config.json + +[INFO|2024-12-28 20:49:41] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-800/special_tokens_map.json + +[INFO|2024-12-28 20:49:50] logging.py:157 >> {'loss': 0.6355, 'learning_rate': 1.2202e-04, 'epoch': 1.29} + +[INFO|2024-12-28 20:49:56] logging.py:157 >> {'loss': 0.6615, 'learning_rate': 1.2120e-04, 'epoch': 1.30} + +[INFO|2024-12-28 20:50:04] logging.py:157 >> {'loss': 0.6096, 'learning_rate': 1.2038e-04, 'epoch': 1.30} + +[INFO|2024-12-28 20:50:11] logging.py:157 >> {'loss': 0.5984, 'learning_rate': 1.1956e-04, 'epoch': 1.31} + +[INFO|2024-12-28 20:50:19] logging.py:157 >> {'loss': 0.5569, 'learning_rate': 1.1874e-04, 'epoch': 1.32} + +[INFO|2024-12-28 20:50:26] logging.py:157 >> {'loss': 0.7088, 'learning_rate': 1.1791e-04, 'epoch': 1.33} + +[INFO|2024-12-28 20:50:32] logging.py:157 >> {'loss': 0.6731, 'learning_rate': 1.1709e-04, 'epoch': 1.34} + +[INFO|2024-12-28 20:50:38] logging.py:157 >> {'loss': 0.6188, 'learning_rate': 1.1626e-04, 'epoch': 1.34} + +[INFO|2024-12-28 20:50:43] logging.py:157 >> {'loss': 0.7004, 'learning_rate': 1.1544e-04, 'epoch': 1.35} + +[INFO|2024-12-28 20:50:51] logging.py:157 >> {'loss': 0.5884, 'learning_rate': 1.1461e-04, 'epoch': 1.36} + +[INFO|2024-12-28 20:51:00] logging.py:157 >> {'loss': 0.5739, 'learning_rate': 1.1378e-04, 'epoch': 1.37} + +[INFO|2024-12-28 20:51:05] logging.py:157 >> {'loss': 0.6435, 'learning_rate': 1.1295e-04, 'epoch': 1.38} + +[INFO|2024-12-28 20:51:12] logging.py:157 >> {'loss': 0.6897, 'learning_rate': 1.1212e-04, 'epoch': 1.38} + +[INFO|2024-12-28 20:51:16] logging.py:157 >> {'loss': 0.6641, 'learning_rate': 1.1129e-04, 'epoch': 1.39} + +[INFO|2024-12-28 20:51:22] logging.py:157 >> {'loss': 0.6273, 'learning_rate': 1.1045e-04, 'epoch': 1.40} + +[INFO|2024-12-28 20:51:30] logging.py:157 >> {'loss': 0.6437, 'learning_rate': 1.0962e-04, 'epoch': 1.41} + +[INFO|2024-12-28 20:51:37] logging.py:157 >> {'loss': 0.6345, 'learning_rate': 1.0879e-04, 'epoch': 1.42} + +[INFO|2024-12-28 20:51:45] logging.py:157 >> {'loss': 0.5913, 'learning_rate': 1.0795e-04, 'epoch': 1.42} + +[INFO|2024-12-28 20:51:51] logging.py:157 >> {'loss': 0.6482, 'learning_rate': 1.0711e-04, 'epoch': 1.43} + +[INFO|2024-12-28 20:51:58] logging.py:157 >> {'loss': 0.6165, 'learning_rate': 1.0628e-04, 'epoch': 1.44} + +[INFO|2024-12-28 20:51:58] trainer.py:3801 >> Saving model checkpoint to saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-900 + +[INFO|2024-12-28 20:51:58] configuration_utils.py:679 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/config.json + +[INFO|2024-12-28 20:51:58] configuration_utils.py:746 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": true, + "vocab_size": 128256 +} + + +[INFO|2024-12-28 20:51:58] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-900/tokenizer_config.json + +[INFO|2024-12-28 20:51:58] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-900/special_tokens_map.json + +[INFO|2024-12-28 20:52:05] logging.py:157 >> {'loss': 0.6340, 'learning_rate': 1.0544e-04, 'epoch': 1.45} + +[INFO|2024-12-28 20:52:09] logging.py:157 >> {'loss': 0.6509, 'learning_rate': 1.0461e-04, 'epoch': 1.46} + +[INFO|2024-12-28 20:52:17] logging.py:157 >> {'loss': 0.6212, 'learning_rate': 1.0377e-04, 'epoch': 1.46} + +[INFO|2024-12-28 20:52:23] logging.py:157 >> {'loss': 0.7305, 'learning_rate': 1.0293e-04, 'epoch': 1.47} + +[INFO|2024-12-28 20:52:29] logging.py:157 >> {'loss': 0.6685, 'learning_rate': 1.0209e-04, 'epoch': 1.48} + +[INFO|2024-12-28 20:52:35] logging.py:157 >> {'loss': 0.6214, 'learning_rate': 1.0126e-04, 'epoch': 1.49} + +[INFO|2024-12-28 20:52:43] logging.py:157 >> {'loss': 0.6035, 'learning_rate': 1.0042e-04, 'epoch': 1.50} + +[INFO|2024-12-28 20:52:49] logging.py:157 >> {'loss': 0.5868, 'learning_rate': 9.9581e-05, 'epoch': 1.50} + +[INFO|2024-12-28 20:52:55] logging.py:157 >> {'loss': 0.6003, 'learning_rate': 9.8743e-05, 'epoch': 1.51} + +[INFO|2024-12-28 20:53:01] logging.py:157 >> {'loss': 0.5854, 'learning_rate': 9.7906e-05, 'epoch': 1.52} + +[INFO|2024-12-28 20:53:09] logging.py:157 >> {'loss': 0.5882, 'learning_rate': 9.7068e-05, 'epoch': 1.53} + +[INFO|2024-12-28 20:53:14] logging.py:157 >> {'loss': 0.7187, 'learning_rate': 9.6231e-05, 'epoch': 1.54} + +[INFO|2024-12-28 20:53:19] logging.py:157 >> {'loss': 0.6156, 'learning_rate': 9.5394e-05, 'epoch': 1.54} + +[INFO|2024-12-28 20:53:23] logging.py:157 >> {'loss': 0.6488, 'learning_rate': 9.4557e-05, 'epoch': 1.55} + +[INFO|2024-12-28 20:53:30] logging.py:157 >> {'loss': 0.6601, 'learning_rate': 9.3721e-05, 'epoch': 1.56} + +[INFO|2024-12-28 20:53:37] logging.py:157 >> {'loss': 0.5968, 'learning_rate': 9.2885e-05, 'epoch': 1.57} + +[INFO|2024-12-28 20:53:42] logging.py:157 >> {'loss': 0.7034, 'learning_rate': 9.2050e-05, 'epoch': 1.58} + +[INFO|2024-12-28 20:53:46] logging.py:157 >> {'loss': 0.5973, 'learning_rate': 9.1215e-05, 'epoch': 1.58} + +[INFO|2024-12-28 20:53:50] logging.py:157 >> {'loss': 0.7877, 'learning_rate': 9.0381e-05, 'epoch': 1.59} + +[INFO|2024-12-28 20:53:57] logging.py:157 >> {'loss': 0.6440, 'learning_rate': 8.9547e-05, 'epoch': 1.60} + +[INFO|2024-12-28 20:53:57] trainer.py:3801 >> Saving model checkpoint to saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1000 + +[INFO|2024-12-28 20:53:57] configuration_utils.py:679 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/config.json + +[INFO|2024-12-28 20:53:57] configuration_utils.py:746 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": true, + "vocab_size": 128256 +} + + +[INFO|2024-12-28 20:53:57] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1000/tokenizer_config.json + +[INFO|2024-12-28 20:53:57] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1000/special_tokens_map.json + +[INFO|2024-12-28 20:54:04] logging.py:157 >> {'loss': 0.6678, 'learning_rate': 8.8714e-05, 'epoch': 1.61} + +[INFO|2024-12-28 20:54:09] logging.py:157 >> {'loss': 0.6088, 'learning_rate': 8.7882e-05, 'epoch': 1.62} + +[INFO|2024-12-28 20:54:16] logging.py:157 >> {'loss': 0.6219, 'learning_rate': 8.7051e-05, 'epoch': 1.62} + +[INFO|2024-12-28 20:54:21] logging.py:157 >> {'loss': 0.6698, 'learning_rate': 8.6221e-05, 'epoch': 1.63} + +[INFO|2024-12-28 20:54:29] logging.py:157 >> {'loss': 0.6207, 'learning_rate': 8.5392e-05, 'epoch': 1.64} + +[INFO|2024-12-28 20:54:36] logging.py:157 >> {'loss': 0.6260, 'learning_rate': 8.4563e-05, 'epoch': 1.65} + +[INFO|2024-12-28 20:54:42] logging.py:157 >> {'loss': 0.6972, 'learning_rate': 8.3736e-05, 'epoch': 1.66} + +[INFO|2024-12-28 20:54:47] logging.py:157 >> {'loss': 0.6282, 'learning_rate': 8.2910e-05, 'epoch': 1.66} + +[INFO|2024-12-28 20:54:53] logging.py:157 >> {'loss': 0.6219, 'learning_rate': 8.2085e-05, 'epoch': 1.67} + +[INFO|2024-12-28 20:55:00] logging.py:157 >> {'loss': 0.6220, 'learning_rate': 8.1262e-05, 'epoch': 1.68} + +[INFO|2024-12-28 20:55:06] logging.py:157 >> {'loss': 0.5801, 'learning_rate': 8.0440e-05, 'epoch': 1.69} + +[INFO|2024-12-28 20:55:13] logging.py:157 >> {'loss': 0.5980, 'learning_rate': 7.9619e-05, 'epoch': 1.70} + +[INFO|2024-12-28 20:55:18] logging.py:157 >> {'loss': 0.6990, 'learning_rate': 7.8799e-05, 'epoch': 1.70} + +[INFO|2024-12-28 20:55:23] logging.py:157 >> {'loss': 0.5882, 'learning_rate': 7.7981e-05, 'epoch': 1.71} + +[INFO|2024-12-28 20:55:31] logging.py:157 >> {'loss': 0.5321, 'learning_rate': 7.7165e-05, 'epoch': 1.72} + +[INFO|2024-12-28 20:55:39] logging.py:157 >> {'loss': 0.6647, 'learning_rate': 7.6350e-05, 'epoch': 1.73} + +[INFO|2024-12-28 20:55:45] logging.py:157 >> {'loss': 0.6280, 'learning_rate': 7.5537e-05, 'epoch': 1.74} + +[INFO|2024-12-28 20:55:52] logging.py:157 >> {'loss': 0.6262, 'learning_rate': 7.4726e-05, 'epoch': 1.74} + +[INFO|2024-12-28 20:55:58] logging.py:157 >> {'loss': 0.6131, 'learning_rate': 7.3916e-05, 'epoch': 1.75} + +[INFO|2024-12-28 20:56:04] logging.py:157 >> {'loss': 0.6494, 'learning_rate': 7.3108e-05, 'epoch': 1.76} + +[INFO|2024-12-28 20:56:04] trainer.py:3801 >> Saving model checkpoint to saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1100 + +[INFO|2024-12-28 20:56:05] configuration_utils.py:679 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/config.json + +[INFO|2024-12-28 20:56:05] configuration_utils.py:746 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": true, + "vocab_size": 128256 +} + + +[INFO|2024-12-28 20:56:05] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1100/tokenizer_config.json + +[INFO|2024-12-28 20:56:05] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1100/special_tokens_map.json + +[INFO|2024-12-28 20:56:12] logging.py:157 >> {'loss': 0.5514, 'learning_rate': 7.2302e-05, 'epoch': 1.77} + +[INFO|2024-12-28 20:56:20] logging.py:157 >> {'loss': 0.5823, 'learning_rate': 7.1498e-05, 'epoch': 1.78} + +[INFO|2024-12-28 20:56:25] logging.py:157 >> {'loss': 0.7207, 'learning_rate': 7.0696e-05, 'epoch': 1.78} + +[INFO|2024-12-28 20:56:32] logging.py:157 >> {'loss': 0.7006, 'learning_rate': 6.9896e-05, 'epoch': 1.79} + +[INFO|2024-12-28 20:56:37] logging.py:157 >> {'loss': 0.6222, 'learning_rate': 6.9098e-05, 'epoch': 1.80} + +[INFO|2024-12-28 20:56:43] logging.py:157 >> {'loss': 0.6569, 'learning_rate': 6.8303e-05, 'epoch': 1.81} + +[INFO|2024-12-28 20:56:51] logging.py:157 >> {'loss': 0.5430, 'learning_rate': 6.7509e-05, 'epoch': 1.82} + +[INFO|2024-12-28 20:56:57] logging.py:157 >> {'loss': 0.6334, 'learning_rate': 6.6718e-05, 'epoch': 1.82} + +[INFO|2024-12-28 20:57:05] logging.py:157 >> {'loss': 0.6701, 'learning_rate': 6.5929e-05, 'epoch': 1.83} + +[INFO|2024-12-28 20:57:12] logging.py:157 >> {'loss': 0.6216, 'learning_rate': 6.5143e-05, 'epoch': 1.84} + +[INFO|2024-12-28 20:57:20] logging.py:157 >> {'loss': 0.5877, 'learning_rate': 6.4359e-05, 'epoch': 1.85} + +[INFO|2024-12-28 20:57:27] logging.py:157 >> {'loss': 0.6256, 'learning_rate': 6.3577e-05, 'epoch': 1.86} + +[INFO|2024-12-28 20:57:31] logging.py:157 >> {'loss': 0.7062, 'learning_rate': 6.2798e-05, 'epoch': 1.86} + +[INFO|2024-12-28 20:57:38] logging.py:157 >> {'loss': 0.6304, 'learning_rate': 6.2022e-05, 'epoch': 1.87} + +[INFO|2024-12-28 20:57:42] logging.py:157 >> {'loss': 0.7695, 'learning_rate': 6.1248e-05, 'epoch': 1.88} + +[INFO|2024-12-28 20:57:49] logging.py:157 >> {'loss': 0.5723, 'learning_rate': 6.0478e-05, 'epoch': 1.89} + +[INFO|2024-12-28 20:57:54] logging.py:157 >> {'loss': 0.6847, 'learning_rate': 5.9709e-05, 'epoch': 1.90} + +[INFO|2024-12-28 20:58:01] logging.py:157 >> {'loss': 0.6618, 'learning_rate': 5.8944e-05, 'epoch': 1.90} + +[INFO|2024-12-28 20:58:07] logging.py:157 >> {'loss': 0.6275, 'learning_rate': 5.8182e-05, 'epoch': 1.91} + +[INFO|2024-12-28 20:58:14] logging.py:157 >> {'loss': 0.5617, 'learning_rate': 5.7422e-05, 'epoch': 1.92} + +[INFO|2024-12-28 20:58:14] trainer.py:3801 >> Saving model checkpoint to saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1200 + +[INFO|2024-12-28 20:58:15] configuration_utils.py:679 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/config.json + +[INFO|2024-12-28 20:58:15] configuration_utils.py:746 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": true, + "vocab_size": 128256 +} + + +[INFO|2024-12-28 20:58:15] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1200/tokenizer_config.json + +[INFO|2024-12-28 20:58:15] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1200/special_tokens_map.json + +[INFO|2024-12-28 20:58:21] logging.py:157 >> {'loss': 0.6278, 'learning_rate': 5.6666e-05, 'epoch': 1.93} + +[INFO|2024-12-28 20:58:26] logging.py:157 >> {'loss': 0.6713, 'learning_rate': 5.5912e-05, 'epoch': 1.94} + +[INFO|2024-12-28 20:58:30] logging.py:157 >> {'loss': 0.6113, 'learning_rate': 5.5162e-05, 'epoch': 1.94} + +[INFO|2024-12-28 20:58:37] logging.py:157 >> {'loss': 0.5587, 'learning_rate': 5.4414e-05, 'epoch': 1.95} + +[INFO|2024-12-28 20:58:44] logging.py:157 >> {'loss': 0.5601, 'learning_rate': 5.3670e-05, 'epoch': 1.96} + +[INFO|2024-12-28 20:58:49] logging.py:157 >> {'loss': 0.5941, 'learning_rate': 5.2930e-05, 'epoch': 1.97} + +[INFO|2024-12-28 20:58:55] logging.py:157 >> {'loss': 0.6285, 'learning_rate': 5.2192e-05, 'epoch': 1.98} + +[INFO|2024-12-28 20:59:01] logging.py:157 >> {'loss': 0.6516, 'learning_rate': 5.1458e-05, 'epoch': 1.98} + +[INFO|2024-12-28 20:59:10] logging.py:157 >> {'loss': 0.5904, 'learning_rate': 5.0727e-05, 'epoch': 1.99} + +[INFO|2024-12-28 20:59:17] logging.py:157 >> {'loss': 0.6190, 'learning_rate': 5.0000e-05, 'epoch': 2.00} + +[INFO|2024-12-28 20:59:21] logging.py:157 >> {'loss': 0.6058, 'learning_rate': 4.9276e-05, 'epoch': 2.01} + +[INFO|2024-12-28 20:59:26] logging.py:157 >> {'loss': 0.6248, 'learning_rate': 4.8556e-05, 'epoch': 2.02} + +[INFO|2024-12-28 20:59:32] logging.py:157 >> {'loss': 0.5247, 'learning_rate': 4.7839e-05, 'epoch': 2.02} + +[INFO|2024-12-28 20:59:40] logging.py:157 >> {'loss': 0.5439, 'learning_rate': 4.7127e-05, 'epoch': 2.03} + +[INFO|2024-12-28 20:59:46] logging.py:157 >> {'loss': 0.4491, 'learning_rate': 4.6417e-05, 'epoch': 2.04} + +[INFO|2024-12-28 20:59:54] logging.py:157 >> {'loss': 0.5200, 'learning_rate': 4.5712e-05, 'epoch': 2.05} + +[INFO|2024-12-28 21:00:01] logging.py:157 >> {'loss': 0.5259, 'learning_rate': 4.5010e-05, 'epoch': 2.06} + +[INFO|2024-12-28 21:00:08] logging.py:157 >> {'loss': 0.5025, 'learning_rate': 4.4312e-05, 'epoch': 2.06} + +[INFO|2024-12-28 21:00:14] logging.py:157 >> {'loss': 0.4772, 'learning_rate': 4.3619e-05, 'epoch': 2.07} + +[INFO|2024-12-28 21:00:19] logging.py:157 >> {'loss': 0.5945, 'learning_rate': 4.2929e-05, 'epoch': 2.08} + +[INFO|2024-12-28 21:00:19] trainer.py:3801 >> Saving model checkpoint to saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1300 + +[INFO|2024-12-28 21:00:20] configuration_utils.py:679 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/config.json + +[INFO|2024-12-28 21:00:20] configuration_utils.py:746 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": true, + "vocab_size": 128256 +} + + +[INFO|2024-12-28 21:00:20] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1300/tokenizer_config.json + +[INFO|2024-12-28 21:00:20] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1300/special_tokens_map.json + +[INFO|2024-12-28 21:00:28] logging.py:157 >> {'loss': 0.4813, 'learning_rate': 4.2243e-05, 'epoch': 2.09} + +[INFO|2024-12-28 21:00:34] logging.py:157 >> {'loss': 0.5315, 'learning_rate': 4.1561e-05, 'epoch': 2.10} + +[INFO|2024-12-28 21:00:39] logging.py:157 >> {'loss': 0.5591, 'learning_rate': 4.0883e-05, 'epoch': 2.10} + +[INFO|2024-12-28 21:00:45] logging.py:157 >> {'loss': 0.6050, 'learning_rate': 4.0210e-05, 'epoch': 2.11} + +[INFO|2024-12-28 21:00:53] logging.py:157 >> {'loss': 0.4955, 'learning_rate': 3.9540e-05, 'epoch': 2.12} + +[INFO|2024-12-28 21:00:58] logging.py:157 >> {'loss': 0.5757, 'learning_rate': 3.8875e-05, 'epoch': 2.13} + +[INFO|2024-12-28 21:01:05] logging.py:157 >> {'loss': 0.5313, 'learning_rate': 3.8214e-05, 'epoch': 2.14} + +[INFO|2024-12-28 21:01:11] logging.py:157 >> {'loss': 0.5904, 'learning_rate': 3.7557e-05, 'epoch': 2.14} + +[INFO|2024-12-28 21:01:18] logging.py:157 >> {'loss': 0.4679, 'learning_rate': 3.6905e-05, 'epoch': 2.15} + +[INFO|2024-12-28 21:01:22] logging.py:157 >> {'loss': 0.5235, 'learning_rate': 3.6258e-05, 'epoch': 2.16} + +[INFO|2024-12-28 21:01:27] logging.py:157 >> {'loss': 0.5797, 'learning_rate': 3.5614e-05, 'epoch': 2.17} + +[INFO|2024-12-28 21:01:32] logging.py:157 >> {'loss': 0.5772, 'learning_rate': 3.4976e-05, 'epoch': 2.18} + +[INFO|2024-12-28 21:01:38] logging.py:157 >> {'loss': 0.5316, 'learning_rate': 3.4341e-05, 'epoch': 2.18} + +[INFO|2024-12-28 21:01:44] logging.py:157 >> {'loss': 0.5646, 'learning_rate': 3.3712e-05, 'epoch': 2.19} + +[INFO|2024-12-28 21:01:49] logging.py:157 >> {'loss': 0.5431, 'learning_rate': 3.3087e-05, 'epoch': 2.20} + +[INFO|2024-12-28 21:01:54] logging.py:157 >> {'loss': 0.5403, 'learning_rate': 3.2467e-05, 'epoch': 2.21} + +[INFO|2024-12-28 21:01:59] logging.py:157 >> {'loss': 0.5329, 'learning_rate': 3.1851e-05, 'epoch': 2.22} + +[INFO|2024-12-28 21:02:07] logging.py:157 >> {'loss': 0.5696, 'learning_rate': 3.1241e-05, 'epoch': 2.22} + +[INFO|2024-12-28 21:02:12] logging.py:157 >> {'loss': 0.5900, 'learning_rate': 3.0635e-05, 'epoch': 2.23} + +[INFO|2024-12-28 21:02:20] logging.py:157 >> {'loss': 0.5116, 'learning_rate': 3.0034e-05, 'epoch': 2.24} + +[INFO|2024-12-28 21:02:20] trainer.py:3801 >> Saving model checkpoint to saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1400 + +[INFO|2024-12-28 21:02:20] configuration_utils.py:679 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/config.json + +[INFO|2024-12-28 21:02:20] configuration_utils.py:746 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": true, + "vocab_size": 128256 +} + + +[INFO|2024-12-28 21:02:20] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1400/tokenizer_config.json + +[INFO|2024-12-28 21:02:20] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1400/special_tokens_map.json + +[INFO|2024-12-28 21:02:27] logging.py:157 >> {'loss': 0.5783, 'learning_rate': 2.9438e-05, 'epoch': 2.25} + +[INFO|2024-12-28 21:02:33] logging.py:157 >> {'loss': 0.6259, 'learning_rate': 2.8846e-05, 'epoch': 2.26} + +[INFO|2024-12-28 21:02:41] logging.py:157 >> {'loss': 0.5759, 'learning_rate': 2.8260e-05, 'epoch': 2.26} + +[INFO|2024-12-28 21:02:47] logging.py:157 >> {'loss': 0.4970, 'learning_rate': 2.7679e-05, 'epoch': 2.27} + +[INFO|2024-12-28 21:02:53] logging.py:157 >> {'loss': 0.6190, 'learning_rate': 2.7103e-05, 'epoch': 2.28} + +[INFO|2024-12-28 21:02:57] logging.py:157 >> {'loss': 0.5858, 'learning_rate': 2.6532e-05, 'epoch': 2.29} + +[INFO|2024-12-28 21:03:04] logging.py:157 >> {'loss': 0.6291, 'learning_rate': 2.5966e-05, 'epoch': 2.30} + +[INFO|2024-12-28 21:03:10] logging.py:157 >> {'loss': 0.6418, 'learning_rate': 2.5406e-05, 'epoch': 2.30} + +[INFO|2024-12-28 21:03:17] logging.py:157 >> {'loss': 0.5483, 'learning_rate': 2.4851e-05, 'epoch': 2.31} + +[INFO|2024-12-28 21:03:24] logging.py:157 >> {'loss': 0.6071, 'learning_rate': 2.4300e-05, 'epoch': 2.32} + +[INFO|2024-12-28 21:03:31] logging.py:157 >> {'loss': 0.5099, 'learning_rate': 2.3756e-05, 'epoch': 2.33} + +[INFO|2024-12-28 21:03:35] logging.py:157 >> {'loss': 0.5186, 'learning_rate': 2.3216e-05, 'epoch': 2.34} + +[INFO|2024-12-28 21:03:42] logging.py:157 >> {'loss': 0.5043, 'learning_rate': 2.2682e-05, 'epoch': 2.34} + +[INFO|2024-12-28 21:03:48] logging.py:157 >> {'loss': 0.6100, 'learning_rate': 2.2154e-05, 'epoch': 2.35} + +[INFO|2024-12-28 21:03:55] logging.py:157 >> {'loss': 0.5987, 'learning_rate': 2.1631e-05, 'epoch': 2.36} + +[INFO|2024-12-28 21:04:02] logging.py:157 >> {'loss': 0.5212, 'learning_rate': 2.1113e-05, 'epoch': 2.37} + +[INFO|2024-12-28 21:04:10] logging.py:157 >> {'loss': 0.4796, 'learning_rate': 2.0601e-05, 'epoch': 2.38} + +[INFO|2024-12-28 21:04:17] logging.py:157 >> {'loss': 0.4844, 'learning_rate': 2.0094e-05, 'epoch': 2.38} + +[INFO|2024-12-28 21:04:22] logging.py:157 >> {'loss': 0.5085, 'learning_rate': 1.9594e-05, 'epoch': 2.39} + +[INFO|2024-12-28 21:04:29] logging.py:157 >> {'loss': 0.4839, 'learning_rate': 1.9098e-05, 'epoch': 2.40} + +[INFO|2024-12-28 21:04:29] trainer.py:3801 >> Saving model checkpoint to saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1500 + +[INFO|2024-12-28 21:04:29] configuration_utils.py:679 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/config.json + +[INFO|2024-12-28 21:04:29] configuration_utils.py:746 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": true, + "vocab_size": 128256 +} + + +[INFO|2024-12-28 21:04:29] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1500/tokenizer_config.json + +[INFO|2024-12-28 21:04:29] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1500/special_tokens_map.json + +[INFO|2024-12-28 21:04:38] logging.py:157 >> {'loss': 0.5715, 'learning_rate': 1.8609e-05, 'epoch': 2.41} + +[INFO|2024-12-28 21:04:42] logging.py:157 >> {'loss': 0.5266, 'learning_rate': 1.8125e-05, 'epoch': 2.42} + +[INFO|2024-12-28 21:04:49] logging.py:157 >> {'loss': 0.5422, 'learning_rate': 1.7647e-05, 'epoch': 2.42} + +[INFO|2024-12-28 21:04:55] logging.py:157 >> {'loss': 0.5553, 'learning_rate': 1.7174e-05, 'epoch': 2.43} + +[INFO|2024-12-28 21:05:03] logging.py:157 >> {'loss': 0.5765, 'learning_rate': 1.6708e-05, 'epoch': 2.44} + +[INFO|2024-12-28 21:05:08] logging.py:157 >> {'loss': 0.5490, 'learning_rate': 1.6247e-05, 'epoch': 2.45} + +[INFO|2024-12-28 21:05:13] logging.py:157 >> {'loss': 0.4876, 'learning_rate': 1.5792e-05, 'epoch': 2.46} + +[INFO|2024-12-28 21:05:19] logging.py:157 >> {'loss': 0.5546, 'learning_rate': 1.5344e-05, 'epoch': 2.46} + +[INFO|2024-12-28 21:05:24] logging.py:157 >> {'loss': 0.5356, 'learning_rate': 1.4901e-05, 'epoch': 2.47} + +[INFO|2024-12-28 21:05:30] logging.py:157 >> {'loss': 0.5142, 'learning_rate': 1.4464e-05, 'epoch': 2.48} + +[INFO|2024-12-28 21:05:35] logging.py:157 >> {'loss': 0.6054, 'learning_rate': 1.4033e-05, 'epoch': 2.49} + +[INFO|2024-12-28 21:05:41] logging.py:157 >> {'loss': 0.5294, 'learning_rate': 1.3608e-05, 'epoch': 2.50} + +[INFO|2024-12-28 21:05:45] logging.py:157 >> {'loss': 0.5294, 'learning_rate': 1.3189e-05, 'epoch': 2.50} + +[INFO|2024-12-28 21:05:53] logging.py:157 >> {'loss': 0.4905, 'learning_rate': 1.2776e-05, 'epoch': 2.51} + +[INFO|2024-12-28 21:06:00] logging.py:157 >> {'loss': 0.5186, 'learning_rate': 1.2369e-05, 'epoch': 2.52} + +[INFO|2024-12-28 21:06:06] logging.py:157 >> {'loss': 0.4909, 'learning_rate': 1.1969e-05, 'epoch': 2.53} + +[INFO|2024-12-28 21:06:11] logging.py:157 >> {'loss': 0.5303, 'learning_rate': 1.1574e-05, 'epoch': 2.54} + +[INFO|2024-12-28 21:06:17] logging.py:157 >> {'loss': 0.5169, 'learning_rate': 1.1186e-05, 'epoch': 2.54} + +[INFO|2024-12-28 21:06:24] logging.py:157 >> {'loss': 0.5339, 'learning_rate': 1.0804e-05, 'epoch': 2.55} + +[INFO|2024-12-28 21:06:29] logging.py:157 >> {'loss': 0.5283, 'learning_rate': 1.0429e-05, 'epoch': 2.56} + +[INFO|2024-12-28 21:06:29] trainer.py:3801 >> Saving model checkpoint to saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1600 + +[INFO|2024-12-28 21:06:29] configuration_utils.py:679 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/config.json + +[INFO|2024-12-28 21:06:29] configuration_utils.py:746 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": true, + "vocab_size": 128256 +} + + +[INFO|2024-12-28 21:06:29] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1600/tokenizer_config.json + +[INFO|2024-12-28 21:06:29] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1600/special_tokens_map.json + +[INFO|2024-12-28 21:06:36] logging.py:157 >> {'loss': 0.5576, 'learning_rate': 1.0059e-05, 'epoch': 2.57} + +[INFO|2024-12-28 21:06:42] logging.py:157 >> {'loss': 0.5136, 'learning_rate': 9.6964e-06, 'epoch': 2.58} + +[INFO|2024-12-28 21:06:49] logging.py:157 >> {'loss': 0.5885, 'learning_rate': 9.3397e-06, 'epoch': 2.58} + +[INFO|2024-12-28 21:06:57] logging.py:157 >> {'loss': 0.5000, 'learning_rate': 8.9894e-06, 'epoch': 2.59} + +[INFO|2024-12-28 21:07:04] logging.py:157 >> {'loss': 0.5325, 'learning_rate': 8.6455e-06, 'epoch': 2.60} + +[INFO|2024-12-28 21:07:10] logging.py:157 >> {'loss': 0.5772, 'learning_rate': 8.3079e-06, 'epoch': 2.61} + +[INFO|2024-12-28 21:07:17] logging.py:157 >> {'loss': 0.5736, 'learning_rate': 7.9768e-06, 'epoch': 2.62} + +[INFO|2024-12-28 21:07:23] logging.py:157 >> {'loss': 0.5199, 'learning_rate': 7.6522e-06, 'epoch': 2.62} + +[INFO|2024-12-28 21:07:30] logging.py:157 >> {'loss': 0.5753, 'learning_rate': 7.3340e-06, 'epoch': 2.63} + +[INFO|2024-12-28 21:07:35] logging.py:157 >> {'loss': 0.5424, 'learning_rate': 7.0224e-06, 'epoch': 2.64} + +[INFO|2024-12-28 21:07:42] logging.py:157 >> {'loss': 0.5555, 'learning_rate': 6.7172e-06, 'epoch': 2.65} + +[INFO|2024-12-28 21:07:50] logging.py:157 >> {'loss': 0.4936, 'learning_rate': 6.4186e-06, 'epoch': 2.66} + +[INFO|2024-12-28 21:07:56] logging.py:157 >> {'loss': 0.6291, 'learning_rate': 6.1266e-06, 'epoch': 2.66} + +[INFO|2024-12-28 21:08:01] logging.py:157 >> {'loss': 0.5197, 'learning_rate': 5.8412e-06, 'epoch': 2.67} + +[INFO|2024-12-28 21:08:08] logging.py:157 >> {'loss': 0.5398, 'learning_rate': 5.5624e-06, 'epoch': 2.68} + +[INFO|2024-12-28 21:08:13] logging.py:157 >> {'loss': 0.6059, 'learning_rate': 5.2902e-06, 'epoch': 2.69} + +[INFO|2024-12-28 21:08:20] logging.py:157 >> {'loss': 0.5310, 'learning_rate': 5.0246e-06, 'epoch': 2.70} + +[INFO|2024-12-28 21:08:24] logging.py:157 >> {'loss': 0.6054, 'learning_rate': 4.7657e-06, 'epoch': 2.70} + +[INFO|2024-12-28 21:08:31] logging.py:157 >> {'loss': 0.5173, 'learning_rate': 4.5135e-06, 'epoch': 2.71} + +[INFO|2024-12-28 21:08:36] logging.py:157 >> {'loss': 0.4944, 'learning_rate': 4.2681e-06, 'epoch': 2.72} + +[INFO|2024-12-28 21:08:36] trainer.py:3801 >> Saving model checkpoint to saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1700 + +[INFO|2024-12-28 21:08:37] configuration_utils.py:679 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/config.json + +[INFO|2024-12-28 21:08:37] configuration_utils.py:746 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": true, + "vocab_size": 128256 +} + + +[INFO|2024-12-28 21:08:37] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1700/tokenizer_config.json + +[INFO|2024-12-28 21:08:37] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1700/special_tokens_map.json + +[INFO|2024-12-28 21:08:44] logging.py:157 >> {'loss': 0.5183, 'learning_rate': 4.0293e-06, 'epoch': 2.73} + +[INFO|2024-12-28 21:08:50] logging.py:157 >> {'loss': 0.6680, 'learning_rate': 3.7972e-06, 'epoch': 2.74} + +[INFO|2024-12-28 21:08:56] logging.py:157 >> {'loss': 0.5068, 'learning_rate': 3.5719e-06, 'epoch': 2.74} + +[INFO|2024-12-28 21:09:02] logging.py:157 >> {'loss': 0.5823, 'learning_rate': 3.3534e-06, 'epoch': 2.75} + +[INFO|2024-12-28 21:09:07] logging.py:157 >> {'loss': 0.6111, 'learning_rate': 3.1417e-06, 'epoch': 2.76} + +[INFO|2024-12-28 21:09:13] logging.py:157 >> {'loss': 0.5231, 'learning_rate': 2.9367e-06, 'epoch': 2.77} + +[INFO|2024-12-28 21:09:20] logging.py:157 >> {'loss': 0.5236, 'learning_rate': 2.7386e-06, 'epoch': 2.78} + +[INFO|2024-12-28 21:09:29] logging.py:157 >> {'loss': 0.5657, 'learning_rate': 2.5473e-06, 'epoch': 2.78} + +[INFO|2024-12-28 21:09:34] logging.py:157 >> {'loss': 0.5518, 'learning_rate': 2.3629e-06, 'epoch': 2.79} + +[INFO|2024-12-28 21:09:40] logging.py:157 >> {'loss': 0.4908, 'learning_rate': 2.1852e-06, 'epoch': 2.80} + +[INFO|2024-12-28 21:09:49] logging.py:157 >> {'loss': 0.5459, 'learning_rate': 2.0145e-06, 'epoch': 2.81} + +[INFO|2024-12-28 21:09:56] logging.py:157 >> {'loss': 0.5208, 'learning_rate': 1.8506e-06, 'epoch': 2.82} + +[INFO|2024-12-28 21:10:02] logging.py:157 >> {'loss': 0.5824, 'learning_rate': 1.6936e-06, 'epoch': 2.82} + +[INFO|2024-12-28 21:10:11] logging.py:157 >> {'loss': 0.5512, 'learning_rate': 1.5436e-06, 'epoch': 2.83} + +[INFO|2024-12-28 21:10:18] logging.py:157 >> {'loss': 0.6327, 'learning_rate': 1.4004e-06, 'epoch': 2.84} + +[INFO|2024-12-28 21:10:26] logging.py:157 >> {'loss': 0.5292, 'learning_rate': 1.2641e-06, 'epoch': 2.85} + +[INFO|2024-12-28 21:10:32] logging.py:157 >> {'loss': 0.5692, 'learning_rate': 1.1348e-06, 'epoch': 2.86} + +[INFO|2024-12-28 21:10:39] logging.py:157 >> {'loss': 0.5532, 'learning_rate': 1.0124e-06, 'epoch': 2.86} + +[INFO|2024-12-28 21:10:44] logging.py:157 >> {'loss': 0.5314, 'learning_rate': 8.9701e-07, 'epoch': 2.87} + +[INFO|2024-12-28 21:10:51] logging.py:157 >> {'loss': 0.5240, 'learning_rate': 7.8853e-07, 'epoch': 2.88} + +[INFO|2024-12-28 21:10:51] trainer.py:3801 >> Saving model checkpoint to saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1800 + +[INFO|2024-12-28 21:10:51] configuration_utils.py:679 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/config.json + +[INFO|2024-12-28 21:10:51] configuration_utils.py:746 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": true, + "vocab_size": 128256 +} + + +[INFO|2024-12-28 21:10:52] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1800/tokenizer_config.json + +[INFO|2024-12-28 21:10:52] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1800/special_tokens_map.json + +[INFO|2024-12-28 21:10:58] logging.py:157 >> {'loss': 0.5227, 'learning_rate': 6.8701e-07, 'epoch': 2.89} + +[INFO|2024-12-28 21:11:04] logging.py:157 >> {'loss': 0.5993, 'learning_rate': 5.9247e-07, 'epoch': 2.90} + +[INFO|2024-12-28 21:11:12] logging.py:157 >> {'loss': 0.5618, 'learning_rate': 5.0490e-07, 'epoch': 2.90} + +[INFO|2024-12-28 21:11:18] logging.py:157 >> {'loss': 0.5504, 'learning_rate': 4.2431e-07, 'epoch': 2.91} + +[INFO|2024-12-28 21:11:24] logging.py:157 >> {'loss': 0.5147, 'learning_rate': 3.5071e-07, 'epoch': 2.92} + +[INFO|2024-12-28 21:11:32] logging.py:157 >> {'loss': 0.5564, 'learning_rate': 2.8411e-07, 'epoch': 2.93} + +[INFO|2024-12-28 21:11:40] logging.py:157 >> {'loss': 0.5188, 'learning_rate': 2.2450e-07, 'epoch': 2.94} + +[INFO|2024-12-28 21:11:45] logging.py:157 >> {'loss': 0.5438, 'learning_rate': 1.7190e-07, 'epoch': 2.94} + +[INFO|2024-12-28 21:11:51] logging.py:157 >> {'loss': 0.4962, 'learning_rate': 1.2630e-07, 'epoch': 2.95} + +[INFO|2024-12-28 21:11:58] logging.py:157 >> {'loss': 0.5021, 'learning_rate': 8.7717e-08, 'epoch': 2.96} + +[INFO|2024-12-28 21:12:03] logging.py:157 >> {'loss': 0.5277, 'learning_rate': 5.6142e-08, 'epoch': 2.97} + +[INFO|2024-12-28 21:12:06] logging.py:157 >> {'loss': 0.5764, 'learning_rate': 3.1581e-08, 'epoch': 2.98} + +[INFO|2024-12-28 21:12:14] logging.py:157 >> {'loss': 0.5408, 'learning_rate': 1.4036e-08, 'epoch': 2.98} + +[INFO|2024-12-28 21:12:23] logging.py:157 >> {'loss': 0.5014, 'learning_rate': 3.5092e-09, 'epoch': 2.99} + +[INFO|2024-12-28 21:12:29] logging.py:157 >> {'loss': 0.6182, 'learning_rate': 0.0000e+00, 'epoch': 3.00} + +[INFO|2024-12-28 21:12:29] trainer.py:3801 >> Saving model checkpoint to saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1875 + +[INFO|2024-12-28 21:12:29] configuration_utils.py:679 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/config.json + +[INFO|2024-12-28 21:12:29] configuration_utils.py:746 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": true, + "vocab_size": 128256 +} + + +[INFO|2024-12-28 21:12:30] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1875/tokenizer_config.json + +[INFO|2024-12-28 21:12:30] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/checkpoint-1875/special_tokens_map.json + +[INFO|2024-12-28 21:12:31] trainer.py:2584 >> + +Training completed. Do not forget to share your model on huggingface.co/models =) + + + +[INFO|2024-12-28 21:12:31] trainer.py:3801 >> Saving model checkpoint to saves/Llama-3.2-1B/lora/llama3.2-1b + +[INFO|2024-12-28 21:12:31] configuration_utils.py:679 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/4e20de362430cd3b72f300e6b0f18e50e7166e08/config.json + +[INFO|2024-12-28 21:12:31] configuration_utils.py:746 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": true, + "vocab_size": 128256 +} + + +[INFO|2024-12-28 21:12:31] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/tokenizer_config.json + +[INFO|2024-12-28 21:12:31] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/Llama-3.2-1B/lora/llama3.2-1b/special_tokens_map.json + +[WARNING|2024-12-28 21:12:32] logging.py:162 >> No metric eval_loss to plot. + +[WARNING|2024-12-28 21:12:32] logging.py:162 >> No metric eval_accuracy to plot. + +[INFO|2024-12-28 21:12:32] modelcard.py:449 >> Dropping the following result as it does not have all the necessary fields: +{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}} + diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61c03bf2b5e0c03056cebf7a58c8c096dafc1ffc --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2065 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end_of_text|>' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|end_of_text|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9aecdedbe38ee7488418ffeffde0ced048359694 --- /dev/null +++ b/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 3.0, + "total_flos": 1.1764404625814323e+17, + "train_loss": 0.6465437274932861, + "train_runtime": 2365.2475, + "train_samples_per_second": 12.684, + "train_steps_per_second": 0.793 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5fc4e9b9654af7b0ddfbc4bebc46009ed280028d --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,377 @@ +{"current_steps": 5, "total_steps": 234, "loss": 1.0617, "lr": 1.9977477585156252e-05, "epoch": 0.064, "percentage": 2.14, "elapsed_time": "0:00:51", "remaining_time": "0:39:31"} +{"current_steps": 5, "total_steps": 1875, "loss": 1.2049, "lr": 0.0001999964908278481, "epoch": 0.008, "percentage": 0.27, "elapsed_time": "0:00:05", "remaining_time": "0:34:31"} +{"current_steps": 10, "total_steps": 1875, "loss": 0.9333, "lr": 0.00019998596355767805, "epoch": 0.016, "percentage": 0.53, "elapsed_time": "0:00:13", "remaining_time": "0:40:38"} +{"current_steps": 15, "total_steps": 1875, "loss": 0.8671, "lr": 0.00019996841892833, "epoch": 0.024, "percentage": 0.8, "elapsed_time": "0:00:19", "remaining_time": "0:41:10"} +{"current_steps": 20, "total_steps": 1875, "loss": 0.7979, "lr": 0.00019994385817114646, "epoch": 0.032, "percentage": 1.07, "elapsed_time": "0:00:25", "remaining_time": "0:39:02"} +{"current_steps": 25, "total_steps": 1875, "loss": 0.7662, "lr": 0.00019991228300988585, "epoch": 0.04, "percentage": 1.33, "elapsed_time": "0:00:31", "remaining_time": "0:38:18"} +{"current_steps": 30, "total_steps": 1875, "loss": 0.7929, "lr": 0.00019987369566060176, "epoch": 0.048, "percentage": 1.6, "elapsed_time": "0:00:37", "remaining_time": "0:38:54"} +{"current_steps": 35, "total_steps": 1875, "loss": 0.7683, "lr": 0.00019982809883148722, "epoch": 0.056, "percentage": 1.87, "elapsed_time": "0:00:44", "remaining_time": "0:38:41"} +{"current_steps": 40, "total_steps": 1875, "loss": 0.8667, "lr": 0.00019977549572268468, "epoch": 0.064, "percentage": 2.13, "elapsed_time": "0:00:50", "remaining_time": "0:38:42"} +{"current_steps": 45, "total_steps": 1875, "loss": 0.8446, "lr": 0.0001997158900260614, "epoch": 0.072, "percentage": 2.4, "elapsed_time": "0:00:58", "remaining_time": "0:39:46"} +{"current_steps": 50, "total_steps": 1875, "loss": 0.9051, "lr": 0.00019964928592495045, "epoch": 0.08, "percentage": 2.67, "elapsed_time": "0:01:03", "remaining_time": "0:38:40"} +{"current_steps": 55, "total_steps": 1875, "loss": 0.7235, "lr": 0.00019957568809385694, "epoch": 0.088, "percentage": 2.93, "elapsed_time": "0:01:08", "remaining_time": "0:37:45"} +{"current_steps": 60, "total_steps": 1875, "loss": 0.8169, "lr": 0.00019949510169813003, "epoch": 0.096, "percentage": 3.2, "elapsed_time": "0:01:12", "remaining_time": "0:36:28"} +{"current_steps": 65, "total_steps": 1875, "loss": 0.8266, "lr": 0.00019940753239360047, "epoch": 0.104, "percentage": 3.47, "elapsed_time": "0:01:18", "remaining_time": "0:36:17"} +{"current_steps": 70, "total_steps": 1875, "loss": 0.758, "lr": 0.00019931298632618356, "epoch": 0.112, "percentage": 3.73, "elapsed_time": "0:01:23", "remaining_time": "0:35:52"} +{"current_steps": 75, "total_steps": 1875, "loss": 0.7759, "lr": 0.0001992114701314478, "epoch": 0.12, "percentage": 4.0, "elapsed_time": "0:01:30", "remaining_time": "0:36:07"} +{"current_steps": 80, "total_steps": 1875, "loss": 0.7797, "lr": 0.0001991029909341493, "epoch": 0.128, "percentage": 4.27, "elapsed_time": "0:01:37", "remaining_time": "0:36:20"} +{"current_steps": 85, "total_steps": 1875, "loss": 0.7437, "lr": 0.00019898755634773158, "epoch": 0.136, "percentage": 4.53, "elapsed_time": "0:01:43", "remaining_time": "0:36:21"} +{"current_steps": 90, "total_steps": 1875, "loss": 0.8043, "lr": 0.0001988651744737914, "epoch": 0.144, "percentage": 4.8, "elapsed_time": "0:01:50", "remaining_time": "0:36:26"} +{"current_steps": 95, "total_steps": 1875, "loss": 0.7701, "lr": 0.00019873585390151003, "epoch": 0.152, "percentage": 5.07, "elapsed_time": "0:01:57", "remaining_time": "0:36:34"} +{"current_steps": 100, "total_steps": 1875, "loss": 0.709, "lr": 0.0001985996037070505, "epoch": 0.16, "percentage": 5.33, "elapsed_time": "0:02:04", "remaining_time": "0:36:46"} +{"current_steps": 105, "total_steps": 1875, "loss": 0.7377, "lr": 0.00019845643345292054, "epoch": 0.168, "percentage": 5.6, "elapsed_time": "0:02:11", "remaining_time": "0:36:57"} +{"current_steps": 110, "total_steps": 1875, "loss": 0.8352, "lr": 0.00019830635318730154, "epoch": 0.176, "percentage": 5.87, "elapsed_time": "0:02:17", "remaining_time": "0:36:45"} +{"current_steps": 115, "total_steps": 1875, "loss": 0.7738, "lr": 0.0001981493734433433, "epoch": 0.184, "percentage": 6.13, "elapsed_time": "0:02:24", "remaining_time": "0:36:55"} +{"current_steps": 120, "total_steps": 1875, "loss": 0.8067, "lr": 0.0001979855052384247, "epoch": 0.192, "percentage": 6.4, "elapsed_time": "0:02:29", "remaining_time": "0:36:32"} +{"current_steps": 125, "total_steps": 1875, "loss": 0.7456, "lr": 0.00019781476007338058, "epoch": 0.2, "percentage": 6.67, "elapsed_time": "0:02:35", "remaining_time": "0:36:13"} +{"current_steps": 130, "total_steps": 1875, "loss": 0.758, "lr": 0.00019763714993169452, "epoch": 0.208, "percentage": 6.93, "elapsed_time": "0:02:42", "remaining_time": "0:36:20"} +{"current_steps": 135, "total_steps": 1875, "loss": 0.7895, "lr": 0.00019745268727865774, "epoch": 0.216, "percentage": 7.2, "elapsed_time": "0:02:48", "remaining_time": "0:36:10"} +{"current_steps": 140, "total_steps": 1875, "loss": 0.7302, "lr": 0.00019726138506049438, "epoch": 0.224, "percentage": 7.47, "elapsed_time": "0:02:56", "remaining_time": "0:36:26"} +{"current_steps": 145, "total_steps": 1875, "loss": 0.8152, "lr": 0.00019706325670345275, "epoch": 0.232, "percentage": 7.73, "elapsed_time": "0:03:01", "remaining_time": "0:36:11"} +{"current_steps": 150, "total_steps": 1875, "loss": 0.8461, "lr": 0.0001968583161128631, "epoch": 0.24, "percentage": 8.0, "elapsed_time": "0:03:06", "remaining_time": "0:35:45"} +{"current_steps": 155, "total_steps": 1875, "loss": 0.7787, "lr": 0.00019664657767216176, "epoch": 0.248, "percentage": 8.27, "elapsed_time": "0:03:12", "remaining_time": "0:35:34"} +{"current_steps": 160, "total_steps": 1875, "loss": 0.7574, "lr": 0.00019642805624188147, "epoch": 0.256, "percentage": 8.53, "elapsed_time": "0:03:18", "remaining_time": "0:35:27"} +{"current_steps": 165, "total_steps": 1875, "loss": 0.8487, "lr": 0.0001962027671586086, "epoch": 0.264, "percentage": 8.8, "elapsed_time": "0:03:22", "remaining_time": "0:35:00"} +{"current_steps": 170, "total_steps": 1875, "loss": 0.6611, "lr": 0.00019597072623390668, "epoch": 0.272, "percentage": 9.07, "elapsed_time": "0:03:29", "remaining_time": "0:34:56"} +{"current_steps": 175, "total_steps": 1875, "loss": 0.7802, "lr": 0.00019573194975320673, "epoch": 0.28, "percentage": 9.33, "elapsed_time": "0:03:35", "remaining_time": "0:34:55"} +{"current_steps": 180, "total_steps": 1875, "loss": 0.6727, "lr": 0.00019548645447466431, "epoch": 0.288, "percentage": 9.6, "elapsed_time": "0:03:42", "remaining_time": "0:34:56"} +{"current_steps": 185, "total_steps": 1875, "loss": 0.7502, "lr": 0.00019523425762798329, "epoch": 0.296, "percentage": 9.87, "elapsed_time": "0:03:47", "remaining_time": "0:34:38"} +{"current_steps": 190, "total_steps": 1875, "loss": 0.8401, "lr": 0.00019497537691320668, "epoch": 0.304, "percentage": 10.13, "elapsed_time": "0:03:53", "remaining_time": "0:34:30"} +{"current_steps": 195, "total_steps": 1875, "loss": 0.7494, "lr": 0.00019470983049947444, "epoch": 0.312, "percentage": 10.4, "elapsed_time": "0:03:58", "remaining_time": "0:34:16"} +{"current_steps": 200, "total_steps": 1875, "loss": 0.7842, "lr": 0.00019443763702374812, "epoch": 0.32, "percentage": 10.67, "elapsed_time": "0:04:04", "remaining_time": "0:34:11"} +{"current_steps": 205, "total_steps": 1875, "loss": 0.8082, "lr": 0.00019415881558950302, "epoch": 0.328, "percentage": 10.93, "elapsed_time": "0:04:12", "remaining_time": "0:34:15"} +{"current_steps": 210, "total_steps": 1875, "loss": 0.7883, "lr": 0.00019387338576538744, "epoch": 0.336, "percentage": 11.2, "elapsed_time": "0:04:17", "remaining_time": "0:34:01"} +{"current_steps": 215, "total_steps": 1875, "loss": 0.7356, "lr": 0.00019358136758384912, "epoch": 0.344, "percentage": 11.47, "elapsed_time": "0:04:25", "remaining_time": "0:34:13"} +{"current_steps": 220, "total_steps": 1875, "loss": 0.7891, "lr": 0.00019328278153972947, "epoch": 0.352, "percentage": 11.73, "elapsed_time": "0:04:31", "remaining_time": "0:34:04"} +{"current_steps": 225, "total_steps": 1875, "loss": 0.7671, "lr": 0.00019297764858882514, "epoch": 0.36, "percentage": 12.0, "elapsed_time": "0:04:39", "remaining_time": "0:34:06"} +{"current_steps": 230, "total_steps": 1875, "loss": 0.6608, "lr": 0.0001926659901464172, "epoch": 0.368, "percentage": 12.27, "elapsed_time": "0:04:45", "remaining_time": "0:34:04"} +{"current_steps": 235, "total_steps": 1875, "loss": 0.647, "lr": 0.00019234782808576824, "epoch": 0.376, "percentage": 12.53, "elapsed_time": "0:04:54", "remaining_time": "0:34:17"} +{"current_steps": 240, "total_steps": 1875, "loss": 0.729, "lr": 0.00019202318473658705, "epoch": 0.384, "percentage": 12.8, "elapsed_time": "0:05:02", "remaining_time": "0:34:18"} +{"current_steps": 245, "total_steps": 1875, "loss": 0.6713, "lr": 0.00019169208288346166, "epoch": 0.392, "percentage": 13.07, "elapsed_time": "0:05:08", "remaining_time": "0:34:09"} +{"current_steps": 250, "total_steps": 1875, "loss": 0.7049, "lr": 0.0001913545457642601, "epoch": 0.4, "percentage": 13.33, "elapsed_time": "0:05:16", "remaining_time": "0:34:19"} +{"current_steps": 255, "total_steps": 1875, "loss": 0.7419, "lr": 0.00019101059706849957, "epoch": 0.408, "percentage": 13.6, "elapsed_time": "0:05:24", "remaining_time": "0:34:20"} +{"current_steps": 260, "total_steps": 1875, "loss": 0.7148, "lr": 0.00019066026093568378, "epoch": 0.416, "percentage": 13.87, "elapsed_time": "0:05:29", "remaining_time": "0:34:08"} +{"current_steps": 265, "total_steps": 1875, "loss": 0.7493, "lr": 0.00019030356195360874, "epoch": 0.424, "percentage": 14.13, "elapsed_time": "0:05:37", "remaining_time": "0:34:10"} +{"current_steps": 270, "total_steps": 1875, "loss": 0.7652, "lr": 0.0001899405251566371, "epoch": 0.432, "percentage": 14.4, "elapsed_time": "0:05:43", "remaining_time": "0:34:01"} +{"current_steps": 275, "total_steps": 1875, "loss": 0.7438, "lr": 0.0001895711760239413, "epoch": 0.44, "percentage": 14.67, "elapsed_time": "0:05:50", "remaining_time": "0:33:59"} +{"current_steps": 280, "total_steps": 1875, "loss": 0.7683, "lr": 0.0001891955404777151, "epoch": 0.448, "percentage": 14.93, "elapsed_time": "0:05:55", "remaining_time": "0:33:44"} +{"current_steps": 285, "total_steps": 1875, "loss": 0.8115, "lr": 0.00018881364488135448, "epoch": 0.456, "percentage": 15.2, "elapsed_time": "0:06:00", "remaining_time": "0:33:30"} +{"current_steps": 290, "total_steps": 1875, "loss": 0.8335, "lr": 0.00018842551603760724, "epoch": 0.464, "percentage": 15.47, "elapsed_time": "0:06:06", "remaining_time": "0:33:25"} +{"current_steps": 295, "total_steps": 1875, "loss": 0.6933, "lr": 0.00018803118118669202, "epoch": 0.472, "percentage": 15.73, "elapsed_time": "0:06:13", "remaining_time": "0:33:20"} +{"current_steps": 300, "total_steps": 1875, "loss": 0.7515, "lr": 0.00018763066800438636, "epoch": 0.48, "percentage": 16.0, "elapsed_time": "0:06:17", "remaining_time": "0:33:04"} +{"current_steps": 305, "total_steps": 1875, "loss": 0.6931, "lr": 0.0001872240046000844, "epoch": 0.488, "percentage": 16.27, "elapsed_time": "0:06:25", "remaining_time": "0:33:05"} +{"current_steps": 310, "total_steps": 1875, "loss": 0.782, "lr": 0.00018681121951482393, "epoch": 0.496, "percentage": 16.53, "elapsed_time": "0:06:31", "remaining_time": "0:32:58"} +{"current_steps": 315, "total_steps": 1875, "loss": 0.7361, "lr": 0.00018639234171928353, "epoch": 0.504, "percentage": 16.8, "elapsed_time": "0:06:38", "remaining_time": "0:32:52"} +{"current_steps": 320, "total_steps": 1875, "loss": 0.7443, "lr": 0.0001859674006117491, "epoch": 0.512, "percentage": 17.07, "elapsed_time": "0:06:44", "remaining_time": "0:32:43"} +{"current_steps": 325, "total_steps": 1875, "loss": 0.7221, "lr": 0.00018553642601605068, "epoch": 0.52, "percentage": 17.33, "elapsed_time": "0:06:51", "remaining_time": "0:32:41"} +{"current_steps": 330, "total_steps": 1875, "loss": 0.7622, "lr": 0.00018509944817946922, "epoch": 0.528, "percentage": 17.6, "elapsed_time": "0:06:57", "remaining_time": "0:32:33"} +{"current_steps": 335, "total_steps": 1875, "loss": 0.8556, "lr": 0.0001846564977706138, "epoch": 0.536, "percentage": 17.87, "elapsed_time": "0:07:03", "remaining_time": "0:32:25"} +{"current_steps": 340, "total_steps": 1875, "loss": 0.7814, "lr": 0.00018420760587726923, "epoch": 0.544, "percentage": 18.13, "elapsed_time": "0:07:09", "remaining_time": "0:32:17"} +{"current_steps": 345, "total_steps": 1875, "loss": 0.722, "lr": 0.0001837528040042142, "epoch": 0.552, "percentage": 18.4, "elapsed_time": "0:07:18", "remaining_time": "0:32:24"} +{"current_steps": 350, "total_steps": 1875, "loss": 0.7903, "lr": 0.00018329212407100994, "epoch": 0.56, "percentage": 18.67, "elapsed_time": "0:07:22", "remaining_time": "0:32:09"} +{"current_steps": 355, "total_steps": 1875, "loss": 0.6996, "lr": 0.00018282559840976042, "epoch": 0.568, "percentage": 18.93, "elapsed_time": "0:07:28", "remaining_time": "0:31:58"} +{"current_steps": 360, "total_steps": 1875, "loss": 0.773, "lr": 0.00018235325976284275, "epoch": 0.576, "percentage": 19.2, "elapsed_time": "0:07:33", "remaining_time": "0:31:49"} +{"current_steps": 365, "total_steps": 1875, "loss": 0.728, "lr": 0.00018187514128060946, "epoch": 0.584, "percentage": 19.47, "elapsed_time": "0:07:40", "remaining_time": "0:31:43"} +{"current_steps": 370, "total_steps": 1875, "loss": 0.7659, "lr": 0.00018139127651906184, "epoch": 0.592, "percentage": 19.73, "elapsed_time": "0:07:45", "remaining_time": "0:31:33"} +{"current_steps": 375, "total_steps": 1875, "loss": 0.7039, "lr": 0.00018090169943749476, "epoch": 0.6, "percentage": 20.0, "elapsed_time": "0:07:50", "remaining_time": "0:31:21"} +{"current_steps": 380, "total_steps": 1875, "loss": 0.7125, "lr": 0.00018040644439611348, "epoch": 0.608, "percentage": 20.27, "elapsed_time": "0:07:56", "remaining_time": "0:31:14"} +{"current_steps": 385, "total_steps": 1875, "loss": 0.698, "lr": 0.00017990554615362198, "epoch": 0.616, "percentage": 20.53, "elapsed_time": "0:08:03", "remaining_time": "0:31:11"} +{"current_steps": 390, "total_steps": 1875, "loss": 0.8255, "lr": 0.00017939903986478355, "epoch": 0.624, "percentage": 20.8, "elapsed_time": "0:08:08", "remaining_time": "0:30:59"} +{"current_steps": 395, "total_steps": 1875, "loss": 0.6616, "lr": 0.00017888696107795342, "epoch": 0.632, "percentage": 21.07, "elapsed_time": "0:08:13", "remaining_time": "0:30:50"} +{"current_steps": 400, "total_steps": 1875, "loss": 0.7452, "lr": 0.000178369345732584, "epoch": 0.64, "percentage": 21.33, "elapsed_time": "0:08:18", "remaining_time": "0:30:39"} +{"current_steps": 405, "total_steps": 1875, "loss": 0.7652, "lr": 0.00017784623015670238, "epoch": 0.648, "percentage": 21.6, "elapsed_time": "0:08:27", "remaining_time": "0:30:40"} +{"current_steps": 410, "total_steps": 1875, "loss": 0.7793, "lr": 0.00017731765106436073, "epoch": 0.656, "percentage": 21.87, "elapsed_time": "0:08:34", "remaining_time": "0:30:39"} +{"current_steps": 415, "total_steps": 1875, "loss": 0.6875, "lr": 0.00017678364555305978, "epoch": 0.664, "percentage": 22.13, "elapsed_time": "0:08:40", "remaining_time": "0:30:29"} +{"current_steps": 420, "total_steps": 1875, "loss": 0.7465, "lr": 0.0001762442511011448, "epoch": 0.672, "percentage": 22.4, "elapsed_time": "0:08:48", "remaining_time": "0:30:30"} +{"current_steps": 425, "total_steps": 1875, "loss": 0.7205, "lr": 0.00017569950556517566, "epoch": 0.68, "percentage": 22.67, "elapsed_time": "0:08:55", "remaining_time": "0:30:27"} +{"current_steps": 430, "total_steps": 1875, "loss": 0.6589, "lr": 0.00017514944717726962, "epoch": 0.688, "percentage": 22.93, "elapsed_time": "0:09:02", "remaining_time": "0:30:22"} +{"current_steps": 435, "total_steps": 1875, "loss": 0.7035, "lr": 0.00017459411454241822, "epoch": 0.696, "percentage": 23.2, "elapsed_time": "0:09:08", "remaining_time": "0:30:14"} +{"current_steps": 440, "total_steps": 1875, "loss": 0.787, "lr": 0.00017403354663577783, "epoch": 0.704, "percentage": 23.47, "elapsed_time": "0:09:15", "remaining_time": "0:30:10"} +{"current_steps": 445, "total_steps": 1875, "loss": 0.7515, "lr": 0.00017346778279993415, "epoch": 0.712, "percentage": 23.73, "elapsed_time": "0:09:23", "remaining_time": "0:30:09"} +{"current_steps": 450, "total_steps": 1875, "loss": 0.7199, "lr": 0.00017289686274214118, "epoch": 0.72, "percentage": 24.0, "elapsed_time": "0:09:28", "remaining_time": "0:30:01"} +{"current_steps": 455, "total_steps": 1875, "loss": 0.8037, "lr": 0.00017232082653153422, "epoch": 0.728, "percentage": 24.27, "elapsed_time": "0:09:35", "remaining_time": "0:29:55"} +{"current_steps": 460, "total_steps": 1875, "loss": 0.7502, "lr": 0.00017173971459631787, "epoch": 0.736, "percentage": 24.53, "elapsed_time": "0:09:40", "remaining_time": "0:29:46"} +{"current_steps": 465, "total_steps": 1875, "loss": 0.7446, "lr": 0.00017115356772092857, "epoch": 0.744, "percentage": 24.8, "elapsed_time": "0:09:47", "remaining_time": "0:29:42"} +{"current_steps": 470, "total_steps": 1875, "loss": 0.6507, "lr": 0.0001705624270431721, "epoch": 0.752, "percentage": 25.07, "elapsed_time": "0:09:54", "remaining_time": "0:29:36"} +{"current_steps": 475, "total_steps": 1875, "loss": 0.7164, "lr": 0.00016996633405133655, "epoch": 0.76, "percentage": 25.33, "elapsed_time": "0:10:00", "remaining_time": "0:29:29"} +{"current_steps": 480, "total_steps": 1875, "loss": 0.7621, "lr": 0.0001693653305812805, "epoch": 0.768, "percentage": 25.6, "elapsed_time": "0:10:06", "remaining_time": "0:29:23"} +{"current_steps": 485, "total_steps": 1875, "loss": 0.7623, "lr": 0.00016875945881349676, "epoch": 0.776, "percentage": 25.87, "elapsed_time": "0:10:12", "remaining_time": "0:29:16"} +{"current_steps": 490, "total_steps": 1875, "loss": 0.6606, "lr": 0.000168148761270152, "epoch": 0.784, "percentage": 26.13, "elapsed_time": "0:10:20", "remaining_time": "0:29:14"} +{"current_steps": 495, "total_steps": 1875, "loss": 0.6941, "lr": 0.00016753328081210245, "epoch": 0.792, "percentage": 26.4, "elapsed_time": "0:10:28", "remaining_time": "0:29:13"} +{"current_steps": 500, "total_steps": 1875, "loss": 0.6841, "lr": 0.00016691306063588583, "epoch": 0.8, "percentage": 26.67, "elapsed_time": "0:10:33", "remaining_time": "0:29:02"} +{"current_steps": 505, "total_steps": 1875, "loss": 0.6996, "lr": 0.00016628814427068953, "epoch": 0.808, "percentage": 26.93, "elapsed_time": "0:10:41", "remaining_time": "0:28:59"} +{"current_steps": 510, "total_steps": 1875, "loss": 0.7542, "lr": 0.00016565857557529566, "epoch": 0.816, "percentage": 27.2, "elapsed_time": "0:10:48", "remaining_time": "0:28:55"} +{"current_steps": 515, "total_steps": 1875, "loss": 0.7175, "lr": 0.00016502439873500289, "epoch": 0.824, "percentage": 27.47, "elapsed_time": "0:10:54", "remaining_time": "0:28:47"} +{"current_steps": 520, "total_steps": 1875, "loss": 0.7565, "lr": 0.0001643856582585254, "epoch": 0.832, "percentage": 27.73, "elapsed_time": "0:10:59", "remaining_time": "0:28:37"} +{"current_steps": 525, "total_steps": 1875, "loss": 0.7339, "lr": 0.000163742398974869, "epoch": 0.84, "percentage": 28.0, "elapsed_time": "0:11:04", "remaining_time": "0:28:29"} +{"current_steps": 530, "total_steps": 1875, "loss": 0.569, "lr": 0.00016309466603018496, "epoch": 0.848, "percentage": 28.27, "elapsed_time": "0:11:10", "remaining_time": "0:28:22"} +{"current_steps": 535, "total_steps": 1875, "loss": 0.7556, "lr": 0.00016244250488460158, "epoch": 0.856, "percentage": 28.53, "elapsed_time": "0:11:15", "remaining_time": "0:28:11"} +{"current_steps": 540, "total_steps": 1875, "loss": 0.7084, "lr": 0.00016178596130903344, "epoch": 0.864, "percentage": 28.8, "elapsed_time": "0:11:19", "remaining_time": "0:28:00"} +{"current_steps": 545, "total_steps": 1875, "loss": 0.6935, "lr": 0.00016112508138196917, "epoch": 0.872, "percentage": 29.07, "elapsed_time": "0:11:25", "remaining_time": "0:27:53"} +{"current_steps": 550, "total_steps": 1875, "loss": 0.7076, "lr": 0.0001604599114862375, "epoch": 0.88, "percentage": 29.33, "elapsed_time": "0:11:31", "remaining_time": "0:27:44"} +{"current_steps": 555, "total_steps": 1875, "loss": 0.7151, "lr": 0.0001597904983057519, "epoch": 0.888, "percentage": 29.6, "elapsed_time": "0:11:36", "remaining_time": "0:27:37"} +{"current_steps": 560, "total_steps": 1875, "loss": 0.7001, "lr": 0.0001591168888222342, "epoch": 0.896, "percentage": 29.87, "elapsed_time": "0:11:42", "remaining_time": "0:27:29"} +{"current_steps": 565, "total_steps": 1875, "loss": 0.7285, "lr": 0.00015843913031191723, "epoch": 0.904, "percentage": 30.13, "elapsed_time": "0:11:47", "remaining_time": "0:27:19"} +{"current_steps": 570, "total_steps": 1875, "loss": 0.8041, "lr": 0.00015775727034222675, "epoch": 0.912, "percentage": 30.4, "elapsed_time": "0:11:53", "remaining_time": "0:27:13"} +{"current_steps": 575, "total_steps": 1875, "loss": 0.7353, "lr": 0.0001570713567684432, "epoch": 0.92, "percentage": 30.67, "elapsed_time": "0:12:00", "remaining_time": "0:27:08"} +{"current_steps": 580, "total_steps": 1875, "loss": 0.7792, "lr": 0.00015638143773034267, "epoch": 0.928, "percentage": 30.93, "elapsed_time": "0:12:05", "remaining_time": "0:27:00"} +{"current_steps": 585, "total_steps": 1875, "loss": 1.0121, "lr": 0.00015568756164881882, "epoch": 0.936, "percentage": 31.2, "elapsed_time": "0:12:12", "remaining_time": "0:26:55"} +{"current_steps": 590, "total_steps": 1875, "loss": 0.7727, "lr": 0.000154989777222484, "epoch": 0.944, "percentage": 31.47, "elapsed_time": "0:12:18", "remaining_time": "0:26:48"} +{"current_steps": 595, "total_steps": 1875, "loss": 0.741, "lr": 0.00015428813342425177, "epoch": 0.952, "percentage": 31.73, "elapsed_time": "0:12:24", "remaining_time": "0:26:41"} +{"current_steps": 600, "total_steps": 1875, "loss": 0.6919, "lr": 0.00015358267949789966, "epoch": 0.96, "percentage": 32.0, "elapsed_time": "0:12:30", "remaining_time": "0:26:34"} +{"current_steps": 605, "total_steps": 1875, "loss": 0.7163, "lr": 0.00015287346495461315, "epoch": 0.968, "percentage": 32.27, "elapsed_time": "0:12:38", "remaining_time": "0:26:31"} +{"current_steps": 610, "total_steps": 1875, "loss": 0.8152, "lr": 0.0001521605395695108, "epoch": 0.976, "percentage": 32.53, "elapsed_time": "0:12:42", "remaining_time": "0:26:21"} +{"current_steps": 615, "total_steps": 1875, "loss": 0.6709, "lr": 0.00015144395337815064, "epoch": 0.984, "percentage": 32.8, "elapsed_time": "0:12:48", "remaining_time": "0:26:15"} +{"current_steps": 620, "total_steps": 1875, "loss": 0.6527, "lr": 0.00015072375667301893, "epoch": 0.992, "percentage": 33.07, "elapsed_time": "0:12:54", "remaining_time": "0:26:07"} +{"current_steps": 625, "total_steps": 1875, "loss": 0.8194, "lr": 0.00015000000000000001, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:12:59", "remaining_time": "0:25:58"} +{"current_steps": 630, "total_steps": 1875, "loss": 0.6627, "lr": 0.00014927273415482915, "epoch": 1.008, "percentage": 33.6, "elapsed_time": "0:13:04", "remaining_time": "0:25:51"} +{"current_steps": 635, "total_steps": 1875, "loss": 0.6366, "lr": 0.0001485420101795274, "epoch": 1.016, "percentage": 33.87, "elapsed_time": "0:13:10", "remaining_time": "0:25:44"} +{"current_steps": 640, "total_steps": 1875, "loss": 0.6717, "lr": 0.00014780787935881923, "epoch": 1.024, "percentage": 34.13, "elapsed_time": "0:13:15", "remaining_time": "0:25:35"} +{"current_steps": 645, "total_steps": 1875, "loss": 0.6483, "lr": 0.0001470703932165333, "epoch": 1.032, "percentage": 34.4, "elapsed_time": "0:13:20", "remaining_time": "0:25:26"} +{"current_steps": 650, "total_steps": 1875, "loss": 0.6151, "lr": 0.00014632960351198618, "epoch": 1.04, "percentage": 34.67, "elapsed_time": "0:13:25", "remaining_time": "0:25:18"} +{"current_steps": 655, "total_steps": 1875, "loss": 0.6707, "lr": 0.00014558556223635003, "epoch": 1.048, "percentage": 34.93, "elapsed_time": "0:13:32", "remaining_time": "0:25:13"} +{"current_steps": 660, "total_steps": 1875, "loss": 0.6125, "lr": 0.00014483832160900326, "epoch": 1.056, "percentage": 35.2, "elapsed_time": "0:13:37", "remaining_time": "0:25:04"} +{"current_steps": 665, "total_steps": 1875, "loss": 0.6206, "lr": 0.00014408793407386588, "epoch": 1.064, "percentage": 35.47, "elapsed_time": "0:13:42", "remaining_time": "0:24:56"} +{"current_steps": 670, "total_steps": 1875, "loss": 0.6161, "lr": 0.00014333445229571873, "epoch": 1.072, "percentage": 35.73, "elapsed_time": "0:13:49", "remaining_time": "0:24:50"} +{"current_steps": 675, "total_steps": 1875, "loss": 0.6583, "lr": 0.00014257792915650728, "epoch": 1.08, "percentage": 36.0, "elapsed_time": "0:13:54", "remaining_time": "0:24:43"} +{"current_steps": 680, "total_steps": 1875, "loss": 0.6222, "lr": 0.00014181841775163013, "epoch": 1.088, "percentage": 36.27, "elapsed_time": "0:13:59", "remaining_time": "0:24:34"} +{"current_steps": 685, "total_steps": 1875, "loss": 0.716, "lr": 0.0001410559713862128, "epoch": 1.096, "percentage": 36.53, "elapsed_time": "0:14:04", "remaining_time": "0:24:26"} +{"current_steps": 690, "total_steps": 1875, "loss": 0.6198, "lr": 0.00014029064357136628, "epoch": 1.104, "percentage": 36.8, "elapsed_time": "0:14:10", "remaining_time": "0:24:21"} +{"current_steps": 695, "total_steps": 1875, "loss": 0.6389, "lr": 0.00013952248802043165, "epoch": 1.112, "percentage": 37.07, "elapsed_time": "0:14:18", "remaining_time": "0:24:17"} +{"current_steps": 700, "total_steps": 1875, "loss": 0.6842, "lr": 0.0001387515586452103, "epoch": 1.12, "percentage": 37.33, "elapsed_time": "0:14:24", "remaining_time": "0:24:10"} +{"current_steps": 705, "total_steps": 1875, "loss": 0.6071, "lr": 0.00013797790955218014, "epoch": 1.1280000000000001, "percentage": 37.6, "elapsed_time": "0:14:32", "remaining_time": "0:24:08"} +{"current_steps": 710, "total_steps": 1875, "loss": 0.5915, "lr": 0.00013720159503869815, "epoch": 1.1360000000000001, "percentage": 37.87, "elapsed_time": "0:14:40", "remaining_time": "0:24:04"} +{"current_steps": 715, "total_steps": 1875, "loss": 0.6794, "lr": 0.00013642266958918984, "epoch": 1.144, "percentage": 38.13, "elapsed_time": "0:14:46", "remaining_time": "0:23:57"} +{"current_steps": 720, "total_steps": 1875, "loss": 0.6773, "lr": 0.00013564118787132506, "epoch": 1.152, "percentage": 38.4, "elapsed_time": "0:14:51", "remaining_time": "0:23:50"} +{"current_steps": 725, "total_steps": 1875, "loss": 0.668, "lr": 0.00013485720473218154, "epoch": 1.16, "percentage": 38.67, "elapsed_time": "0:14:59", "remaining_time": "0:23:46"} +{"current_steps": 730, "total_steps": 1875, "loss": 0.6997, "lr": 0.0001340707751943952, "epoch": 1.168, "percentage": 38.93, "elapsed_time": "0:15:06", "remaining_time": "0:23:41"} +{"current_steps": 735, "total_steps": 1875, "loss": 0.831, "lr": 0.00013328195445229868, "epoch": 1.176, "percentage": 39.2, "elapsed_time": "0:15:15", "remaining_time": "0:23:39"} +{"current_steps": 740, "total_steps": 1875, "loss": 0.6378, "lr": 0.00013249079786804765, "epoch": 1.184, "percentage": 39.47, "elapsed_time": "0:15:21", "remaining_time": "0:23:33"} +{"current_steps": 745, "total_steps": 1875, "loss": 0.6547, "lr": 0.0001316973609677352, "epoch": 1.192, "percentage": 39.73, "elapsed_time": "0:15:26", "remaining_time": "0:23:25"} +{"current_steps": 750, "total_steps": 1875, "loss": 0.5808, "lr": 0.00013090169943749476, "epoch": 1.2, "percentage": 40.0, "elapsed_time": "0:15:31", "remaining_time": "0:23:17"} +{"current_steps": 755, "total_steps": 1875, "loss": 0.5582, "lr": 0.00013010386911959206, "epoch": 1.208, "percentage": 40.27, "elapsed_time": "0:15:37", "remaining_time": "0:23:10"} +{"current_steps": 760, "total_steps": 1875, "loss": 0.5801, "lr": 0.00012930392600850573, "epoch": 1.216, "percentage": 40.53, "elapsed_time": "0:15:44", "remaining_time": "0:23:05"} +{"current_steps": 765, "total_steps": 1875, "loss": 0.65, "lr": 0.0001285019262469976, "epoch": 1.224, "percentage": 40.8, "elapsed_time": "0:15:50", "remaining_time": "0:22:59"} +{"current_steps": 770, "total_steps": 1875, "loss": 0.6627, "lr": 0.00012769792612217224, "epoch": 1.232, "percentage": 41.07, "elapsed_time": "0:15:57", "remaining_time": "0:22:53"} +{"current_steps": 775, "total_steps": 1875, "loss": 0.5603, "lr": 0.00012689198206152657, "epoch": 1.24, "percentage": 41.33, "elapsed_time": "0:16:03", "remaining_time": "0:22:46"} +{"current_steps": 780, "total_steps": 1875, "loss": 0.6525, "lr": 0.00012608415062898972, "epoch": 1.248, "percentage": 41.6, "elapsed_time": "0:16:08", "remaining_time": "0:22:38"} +{"current_steps": 785, "total_steps": 1875, "loss": 0.6731, "lr": 0.00012527448852095295, "epoch": 1.256, "percentage": 41.87, "elapsed_time": "0:16:14", "remaining_time": "0:22:33"} +{"current_steps": 790, "total_steps": 1875, "loss": 0.6255, "lr": 0.00012446305256229073, "epoch": 1.264, "percentage": 42.13, "elapsed_time": "0:16:21", "remaining_time": "0:22:27"} +{"current_steps": 795, "total_steps": 1875, "loss": 0.6585, "lr": 0.00012364989970237248, "epoch": 1.272, "percentage": 42.4, "elapsed_time": "0:16:28", "remaining_time": "0:22:22"} +{"current_steps": 800, "total_steps": 1875, "loss": 0.5996, "lr": 0.00012283508701106557, "epoch": 1.28, "percentage": 42.67, "elapsed_time": "0:16:35", "remaining_time": "0:22:17"} +{"current_steps": 805, "total_steps": 1875, "loss": 0.6355, "lr": 0.00012201867167473015, "epoch": 1.288, "percentage": 42.93, "elapsed_time": "0:16:44", "remaining_time": "0:22:15"} +{"current_steps": 810, "total_steps": 1875, "loss": 0.6615, "lr": 0.00012120071099220549, "epoch": 1.296, "percentage": 43.2, "elapsed_time": "0:16:50", "remaining_time": "0:22:08"} +{"current_steps": 815, "total_steps": 1875, "loss": 0.6096, "lr": 0.0001203812623707885, "epoch": 1.304, "percentage": 43.47, "elapsed_time": "0:16:58", "remaining_time": "0:22:04"} +{"current_steps": 820, "total_steps": 1875, "loss": 0.5984, "lr": 0.00011956038332220483, "epoch": 1.312, "percentage": 43.73, "elapsed_time": "0:17:05", "remaining_time": "0:22:00"} +{"current_steps": 825, "total_steps": 1875, "loss": 0.5569, "lr": 0.00011873813145857249, "epoch": 1.32, "percentage": 44.0, "elapsed_time": "0:17:14", "remaining_time": "0:21:56"} +{"current_steps": 830, "total_steps": 1875, "loss": 0.7088, "lr": 0.00011791456448835825, "epoch": 1.328, "percentage": 44.27, "elapsed_time": "0:17:20", "remaining_time": "0:21:50"} +{"current_steps": 835, "total_steps": 1875, "loss": 0.6731, "lr": 0.00011708974021232769, "epoch": 1.336, "percentage": 44.53, "elapsed_time": "0:17:26", "remaining_time": "0:21:43"} +{"current_steps": 840, "total_steps": 1875, "loss": 0.6188, "lr": 0.00011626371651948838, "epoch": 1.3439999999999999, "percentage": 44.8, "elapsed_time": "0:17:32", "remaining_time": "0:21:36"} +{"current_steps": 845, "total_steps": 1875, "loss": 0.7004, "lr": 0.00011543655138302714, "epoch": 1.3519999999999999, "percentage": 45.07, "elapsed_time": "0:17:37", "remaining_time": "0:21:29"} +{"current_steps": 850, "total_steps": 1875, "loss": 0.5884, "lr": 0.00011460830285624118, "epoch": 1.3599999999999999, "percentage": 45.33, "elapsed_time": "0:17:45", "remaining_time": "0:21:25"} +{"current_steps": 855, "total_steps": 1875, "loss": 0.5739, "lr": 0.0001137790290684638, "epoch": 1.3679999999999999, "percentage": 45.6, "elapsed_time": "0:17:54", "remaining_time": "0:21:22"} +{"current_steps": 860, "total_steps": 1875, "loss": 0.6435, "lr": 0.00011294878822098469, "epoch": 1.376, "percentage": 45.87, "elapsed_time": "0:17:59", "remaining_time": "0:21:14"} +{"current_steps": 865, "total_steps": 1875, "loss": 0.6897, "lr": 0.00011211763858296507, "epoch": 1.384, "percentage": 46.13, "elapsed_time": "0:18:06", "remaining_time": "0:21:08"} +{"current_steps": 870, "total_steps": 1875, "loss": 0.6641, "lr": 0.00011128563848734816, "epoch": 1.392, "percentage": 46.4, "elapsed_time": "0:18:10", "remaining_time": "0:21:00"} +{"current_steps": 875, "total_steps": 1875, "loss": 0.6273, "lr": 0.00011045284632676536, "epoch": 1.4, "percentage": 46.67, "elapsed_time": "0:18:16", "remaining_time": "0:20:53"} +{"current_steps": 880, "total_steps": 1875, "loss": 0.6437, "lr": 0.00010961932054943778, "epoch": 1.408, "percentage": 46.93, "elapsed_time": "0:18:24", "remaining_time": "0:20:49"} +{"current_steps": 885, "total_steps": 1875, "loss": 0.6345, "lr": 0.00010878511965507434, "epoch": 1.416, "percentage": 47.2, "elapsed_time": "0:18:31", "remaining_time": "0:20:43"} +{"current_steps": 890, "total_steps": 1875, "loss": 0.5913, "lr": 0.00010795030219076599, "epoch": 1.424, "percentage": 47.47, "elapsed_time": "0:18:39", "remaining_time": "0:20:39"} +{"current_steps": 895, "total_steps": 1875, "loss": 0.6482, "lr": 0.00010711492674687671, "epoch": 1.432, "percentage": 47.73, "elapsed_time": "0:18:45", "remaining_time": "0:20:32"} +{"current_steps": 900, "total_steps": 1875, "loss": 0.6165, "lr": 0.00010627905195293135, "epoch": 1.44, "percentage": 48.0, "elapsed_time": "0:18:52", "remaining_time": "0:20:26"} +{"current_steps": 905, "total_steps": 1875, "loss": 0.634, "lr": 0.00010544273647350092, "epoch": 1.448, "percentage": 48.27, "elapsed_time": "0:18:59", "remaining_time": "0:20:21"} +{"current_steps": 910, "total_steps": 1875, "loss": 0.6509, "lr": 0.00010460603900408523, "epoch": 1.456, "percentage": 48.53, "elapsed_time": "0:19:04", "remaining_time": "0:20:13"} +{"current_steps": 915, "total_steps": 1875, "loss": 0.6212, "lr": 0.00010376901826699348, "epoch": 1.464, "percentage": 48.8, "elapsed_time": "0:19:11", "remaining_time": "0:20:07"} +{"current_steps": 920, "total_steps": 1875, "loss": 0.7305, "lr": 0.00010293173300722285, "epoch": 1.472, "percentage": 49.07, "elapsed_time": "0:19:17", "remaining_time": "0:20:01"} +{"current_steps": 925, "total_steps": 1875, "loss": 0.6685, "lr": 0.0001020942419883357, "epoch": 1.48, "percentage": 49.33, "elapsed_time": "0:19:23", "remaining_time": "0:19:54"} +{"current_steps": 930, "total_steps": 1875, "loss": 0.6214, "lr": 0.00010125660398833528, "epoch": 1.488, "percentage": 49.6, "elapsed_time": "0:19:29", "remaining_time": "0:19:48"} +{"current_steps": 935, "total_steps": 1875, "loss": 0.6035, "lr": 0.0001004188777955404, "epoch": 1.496, "percentage": 49.87, "elapsed_time": "0:19:37", "remaining_time": "0:19:43"} +{"current_steps": 940, "total_steps": 1875, "loss": 0.5868, "lr": 9.958112220445963e-05, "epoch": 1.504, "percentage": 50.13, "elapsed_time": "0:19:44", "remaining_time": "0:19:37"} +{"current_steps": 945, "total_steps": 1875, "loss": 0.6003, "lr": 9.874339601166473e-05, "epoch": 1.512, "percentage": 50.4, "elapsed_time": "0:19:50", "remaining_time": "0:19:31"} +{"current_steps": 950, "total_steps": 1875, "loss": 0.5854, "lr": 9.790575801166432e-05, "epoch": 1.52, "percentage": 50.67, "elapsed_time": "0:19:55", "remaining_time": "0:19:24"} +{"current_steps": 955, "total_steps": 1875, "loss": 0.5882, "lr": 9.706826699277718e-05, "epoch": 1.528, "percentage": 50.93, "elapsed_time": "0:20:03", "remaining_time": "0:19:19"} +{"current_steps": 960, "total_steps": 1875, "loss": 0.7187, "lr": 9.623098173300654e-05, "epoch": 1.536, "percentage": 51.2, "elapsed_time": "0:20:08", "remaining_time": "0:19:11"} +{"current_steps": 965, "total_steps": 1875, "loss": 0.6156, "lr": 9.539396099591476e-05, "epoch": 1.544, "percentage": 51.47, "elapsed_time": "0:20:13", "remaining_time": "0:19:04"} +{"current_steps": 970, "total_steps": 1875, "loss": 0.6488, "lr": 9.455726352649911e-05, "epoch": 1.552, "percentage": 51.73, "elapsed_time": "0:20:17", "remaining_time": "0:18:56"} +{"current_steps": 975, "total_steps": 1875, "loss": 0.6601, "lr": 9.372094804706867e-05, "epoch": 1.56, "percentage": 52.0, "elapsed_time": "0:20:24", "remaining_time": "0:18:50"} +{"current_steps": 980, "total_steps": 1875, "loss": 0.5968, "lr": 9.288507325312335e-05, "epoch": 1.568, "percentage": 52.27, "elapsed_time": "0:20:31", "remaining_time": "0:18:44"} +{"current_steps": 985, "total_steps": 1875, "loss": 0.7034, "lr": 9.204969780923403e-05, "epoch": 1.576, "percentage": 52.53, "elapsed_time": "0:20:36", "remaining_time": "0:18:37"} +{"current_steps": 990, "total_steps": 1875, "loss": 0.5973, "lr": 9.121488034492569e-05, "epoch": 1.584, "percentage": 52.8, "elapsed_time": "0:20:41", "remaining_time": "0:18:29"} +{"current_steps": 995, "total_steps": 1875, "loss": 0.7877, "lr": 9.038067945056227e-05, "epoch": 1.592, "percentage": 53.07, "elapsed_time": "0:20:45", "remaining_time": "0:18:21"} +{"current_steps": 1000, "total_steps": 1875, "loss": 0.644, "lr": 8.954715367323468e-05, "epoch": 1.6, "percentage": 53.33, "elapsed_time": "0:20:51", "remaining_time": "0:18:15"} +{"current_steps": 1005, "total_steps": 1875, "loss": 0.6678, "lr": 8.871436151265184e-05, "epoch": 1.608, "percentage": 53.6, "elapsed_time": "0:20:58", "remaining_time": "0:18:09"} +{"current_steps": 1010, "total_steps": 1875, "loss": 0.6088, "lr": 8.788236141703498e-05, "epoch": 1.616, "percentage": 53.87, "elapsed_time": "0:21:03", "remaining_time": "0:18:02"} +{"current_steps": 1015, "total_steps": 1875, "loss": 0.6219, "lr": 8.705121177901532e-05, "epoch": 1.624, "percentage": 54.13, "elapsed_time": "0:21:10", "remaining_time": "0:17:56"} +{"current_steps": 1020, "total_steps": 1875, "loss": 0.6698, "lr": 8.62209709315362e-05, "epoch": 1.6320000000000001, "percentage": 54.4, "elapsed_time": "0:21:16", "remaining_time": "0:17:49"} +{"current_steps": 1025, "total_steps": 1875, "loss": 0.6207, "lr": 8.539169714375885e-05, "epoch": 1.6400000000000001, "percentage": 54.67, "elapsed_time": "0:21:23", "remaining_time": "0:17:44"} +{"current_steps": 1030, "total_steps": 1875, "loss": 0.626, "lr": 8.456344861697289e-05, "epoch": 1.6480000000000001, "percentage": 54.93, "elapsed_time": "0:21:30", "remaining_time": "0:17:39"} +{"current_steps": 1035, "total_steps": 1875, "loss": 0.6972, "lr": 8.373628348051165e-05, "epoch": 1.6560000000000001, "percentage": 55.2, "elapsed_time": "0:21:36", "remaining_time": "0:17:32"} +{"current_steps": 1040, "total_steps": 1875, "loss": 0.6282, "lr": 8.291025978767235e-05, "epoch": 1.6640000000000001, "percentage": 55.47, "elapsed_time": "0:21:41", "remaining_time": "0:17:25"} +{"current_steps": 1045, "total_steps": 1875, "loss": 0.6219, "lr": 8.208543551164178e-05, "epoch": 1.6720000000000002, "percentage": 55.73, "elapsed_time": "0:21:47", "remaining_time": "0:17:18"} +{"current_steps": 1050, "total_steps": 1875, "loss": 0.622, "lr": 8.126186854142752e-05, "epoch": 1.6800000000000002, "percentage": 56.0, "elapsed_time": "0:21:54", "remaining_time": "0:17:12"} +{"current_steps": 1055, "total_steps": 1875, "loss": 0.5801, "lr": 8.04396166777952e-05, "epoch": 1.688, "percentage": 56.27, "elapsed_time": "0:22:01", "remaining_time": "0:17:06"} +{"current_steps": 1060, "total_steps": 1875, "loss": 0.598, "lr": 7.961873762921153e-05, "epoch": 1.696, "percentage": 56.53, "elapsed_time": "0:22:07", "remaining_time": "0:17:00"} +{"current_steps": 1065, "total_steps": 1875, "loss": 0.699, "lr": 7.879928900779456e-05, "epoch": 1.704, "percentage": 56.8, "elapsed_time": "0:22:12", "remaining_time": "0:16:53"} +{"current_steps": 1070, "total_steps": 1875, "loss": 0.5882, "lr": 7.798132832526986e-05, "epoch": 1.712, "percentage": 57.07, "elapsed_time": "0:22:17", "remaining_time": "0:16:46"} +{"current_steps": 1075, "total_steps": 1875, "loss": 0.5321, "lr": 7.716491298893442e-05, "epoch": 1.72, "percentage": 57.33, "elapsed_time": "0:22:25", "remaining_time": "0:16:41"} +{"current_steps": 1080, "total_steps": 1875, "loss": 0.6647, "lr": 7.635010029762756e-05, "epoch": 1.728, "percentage": 57.6, "elapsed_time": "0:22:33", "remaining_time": "0:16:36"} +{"current_steps": 1085, "total_steps": 1875, "loss": 0.628, "lr": 7.553694743770928e-05, "epoch": 1.736, "percentage": 57.87, "elapsed_time": "0:22:40", "remaining_time": "0:16:30"} +{"current_steps": 1090, "total_steps": 1875, "loss": 0.6262, "lr": 7.472551147904708e-05, "epoch": 1.744, "percentage": 58.13, "elapsed_time": "0:22:46", "remaining_time": "0:16:24"} +{"current_steps": 1095, "total_steps": 1875, "loss": 0.6131, "lr": 7.391584937101033e-05, "epoch": 1.752, "percentage": 58.4, "elapsed_time": "0:22:53", "remaining_time": "0:16:18"} +{"current_steps": 1100, "total_steps": 1875, "loss": 0.6494, "lr": 7.310801793847344e-05, "epoch": 1.76, "percentage": 58.67, "elapsed_time": "0:22:58", "remaining_time": "0:16:11"} +{"current_steps": 1105, "total_steps": 1875, "loss": 0.5514, "lr": 7.230207387782776e-05, "epoch": 1.768, "percentage": 58.93, "elapsed_time": "0:23:06", "remaining_time": "0:16:06"} +{"current_steps": 1110, "total_steps": 1875, "loss": 0.5823, "lr": 7.149807375300239e-05, "epoch": 1.776, "percentage": 59.2, "elapsed_time": "0:23:14", "remaining_time": "0:16:01"} +{"current_steps": 1115, "total_steps": 1875, "loss": 0.7207, "lr": 7.069607399149428e-05, "epoch": 1.784, "percentage": 59.47, "elapsed_time": "0:23:20", "remaining_time": "0:15:54"} +{"current_steps": 1120, "total_steps": 1875, "loss": 0.7006, "lr": 6.989613088040796e-05, "epoch": 1.792, "percentage": 59.73, "elapsed_time": "0:23:26", "remaining_time": "0:15:48"} +{"current_steps": 1125, "total_steps": 1875, "loss": 0.6222, "lr": 6.909830056250527e-05, "epoch": 1.8, "percentage": 60.0, "elapsed_time": "0:23:32", "remaining_time": "0:15:41"} +{"current_steps": 1130, "total_steps": 1875, "loss": 0.6569, "lr": 6.830263903226483e-05, "epoch": 1.808, "percentage": 60.27, "elapsed_time": "0:23:37", "remaining_time": "0:15:34"} +{"current_steps": 1135, "total_steps": 1875, "loss": 0.543, "lr": 6.750920213195238e-05, "epoch": 1.8159999999999998, "percentage": 60.53, "elapsed_time": "0:23:45", "remaining_time": "0:15:29"} +{"current_steps": 1140, "total_steps": 1875, "loss": 0.6334, "lr": 6.671804554770135e-05, "epoch": 1.8239999999999998, "percentage": 60.8, "elapsed_time": "0:23:51", "remaining_time": "0:15:23"} +{"current_steps": 1145, "total_steps": 1875, "loss": 0.6701, "lr": 6.592922480560483e-05, "epoch": 1.8319999999999999, "percentage": 61.07, "elapsed_time": "0:24:00", "remaining_time": "0:15:18"} +{"current_steps": 1150, "total_steps": 1875, "loss": 0.6216, "lr": 6.51427952678185e-05, "epoch": 1.8399999999999999, "percentage": 61.33, "elapsed_time": "0:24:07", "remaining_time": "0:15:12"} +{"current_steps": 1155, "total_steps": 1875, "loss": 0.5877, "lr": 6.435881212867493e-05, "epoch": 1.8479999999999999, "percentage": 61.6, "elapsed_time": "0:24:14", "remaining_time": "0:15:06"} +{"current_steps": 1160, "total_steps": 1875, "loss": 0.6256, "lr": 6.357733041081018e-05, "epoch": 1.8559999999999999, "percentage": 61.87, "elapsed_time": "0:24:21", "remaining_time": "0:15:00"} +{"current_steps": 1165, "total_steps": 1875, "loss": 0.7062, "lr": 6.27984049613019e-05, "epoch": 1.8639999999999999, "percentage": 62.13, "elapsed_time": "0:24:26", "remaining_time": "0:14:53"} +{"current_steps": 1170, "total_steps": 1875, "loss": 0.6304, "lr": 6.20220904478199e-05, "epoch": 1.8719999999999999, "percentage": 62.4, "elapsed_time": "0:24:32", "remaining_time": "0:14:47"} +{"current_steps": 1175, "total_steps": 1875, "loss": 0.7695, "lr": 6.12484413547897e-05, "epoch": 1.88, "percentage": 62.67, "elapsed_time": "0:24:36", "remaining_time": "0:14:39"} +{"current_steps": 1180, "total_steps": 1875, "loss": 0.5723, "lr": 6.047751197956838e-05, "epoch": 1.888, "percentage": 62.93, "elapsed_time": "0:24:43", "remaining_time": "0:14:33"} +{"current_steps": 1185, "total_steps": 1875, "loss": 0.6847, "lr": 5.9709356428633746e-05, "epoch": 1.896, "percentage": 63.2, "elapsed_time": "0:24:48", "remaining_time": "0:14:26"} +{"current_steps": 1190, "total_steps": 1875, "loss": 0.6618, "lr": 5.8944028613787206e-05, "epoch": 1.904, "percentage": 63.47, "elapsed_time": "0:24:55", "remaining_time": "0:14:20"} +{"current_steps": 1195, "total_steps": 1875, "loss": 0.6275, "lr": 5.818158224836987e-05, "epoch": 1.912, "percentage": 63.73, "elapsed_time": "0:25:01", "remaining_time": "0:14:14"} +{"current_steps": 1200, "total_steps": 1875, "loss": 0.5617, "lr": 5.7422070843492734e-05, "epoch": 1.92, "percentage": 64.0, "elapsed_time": "0:25:09", "remaining_time": "0:14:08"} +{"current_steps": 1205, "total_steps": 1875, "loss": 0.6278, "lr": 5.666554770428129e-05, "epoch": 1.928, "percentage": 64.27, "elapsed_time": "0:25:15", "remaining_time": "0:14:02"} +{"current_steps": 1210, "total_steps": 1875, "loss": 0.6713, "lr": 5.591206592613416e-05, "epoch": 1.936, "percentage": 64.53, "elapsed_time": "0:25:20", "remaining_time": "0:13:55"} +{"current_steps": 1215, "total_steps": 1875, "loss": 0.6113, "lr": 5.5161678390996796e-05, "epoch": 1.944, "percentage": 64.8, "elapsed_time": "0:25:25", "remaining_time": "0:13:48"} +{"current_steps": 1220, "total_steps": 1875, "loss": 0.5587, "lr": 5.441443776365003e-05, "epoch": 1.952, "percentage": 65.07, "elapsed_time": "0:25:31", "remaining_time": "0:13:42"} +{"current_steps": 1225, "total_steps": 1875, "loss": 0.5601, "lr": 5.3670396488013854e-05, "epoch": 1.96, "percentage": 65.33, "elapsed_time": "0:25:38", "remaining_time": "0:13:36"} +{"current_steps": 1230, "total_steps": 1875, "loss": 0.5941, "lr": 5.292960678346675e-05, "epoch": 1.968, "percentage": 65.6, "elapsed_time": "0:25:43", "remaining_time": "0:13:29"} +{"current_steps": 1235, "total_steps": 1875, "loss": 0.6285, "lr": 5.2192120641180786e-05, "epoch": 1.976, "percentage": 65.87, "elapsed_time": "0:25:49", "remaining_time": "0:13:22"} +{"current_steps": 1240, "total_steps": 1875, "loss": 0.6516, "lr": 5.145798982047261e-05, "epoch": 1.984, "percentage": 66.13, "elapsed_time": "0:25:55", "remaining_time": "0:13:16"} +{"current_steps": 1245, "total_steps": 1875, "loss": 0.5904, "lr": 5.072726584517086e-05, "epoch": 1.992, "percentage": 66.4, "elapsed_time": "0:26:05", "remaining_time": "0:13:12"} +{"current_steps": 1250, "total_steps": 1875, "loss": 0.619, "lr": 5.000000000000002e-05, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "0:26:11", "remaining_time": "0:13:05"} +{"current_steps": 1255, "total_steps": 1875, "loss": 0.6058, "lr": 4.927624332698109e-05, "epoch": 2.008, "percentage": 66.93, "elapsed_time": "0:26:15", "remaining_time": "0:12:58"} +{"current_steps": 1260, "total_steps": 1875, "loss": 0.6248, "lr": 4.8556046621849346e-05, "epoch": 2.016, "percentage": 67.2, "elapsed_time": "0:26:20", "remaining_time": "0:12:51"} +{"current_steps": 1265, "total_steps": 1875, "loss": 0.5247, "lr": 4.783946043048923e-05, "epoch": 2.024, "percentage": 67.47, "elapsed_time": "0:26:27", "remaining_time": "0:12:45"} +{"current_steps": 1270, "total_steps": 1875, "loss": 0.5439, "lr": 4.712653504538683e-05, "epoch": 2.032, "percentage": 67.73, "elapsed_time": "0:26:34", "remaining_time": "0:12:39"} +{"current_steps": 1275, "total_steps": 1875, "loss": 0.4491, "lr": 4.6417320502100316e-05, "epoch": 2.04, "percentage": 68.0, "elapsed_time": "0:26:40", "remaining_time": "0:12:33"} +{"current_steps": 1280, "total_steps": 1875, "loss": 0.52, "lr": 4.5711866575748276e-05, "epoch": 2.048, "percentage": 68.27, "elapsed_time": "0:26:48", "remaining_time": "0:12:27"} +{"current_steps": 1285, "total_steps": 1875, "loss": 0.5259, "lr": 4.501022277751602e-05, "epoch": 2.056, "percentage": 68.53, "elapsed_time": "0:26:55", "remaining_time": "0:12:21"} +{"current_steps": 1290, "total_steps": 1875, "loss": 0.5025, "lr": 4.431243835118124e-05, "epoch": 2.064, "percentage": 68.8, "elapsed_time": "0:27:02", "remaining_time": "0:12:15"} +{"current_steps": 1295, "total_steps": 1875, "loss": 0.4772, "lr": 4.361856226965733e-05, "epoch": 2.072, "percentage": 69.07, "elapsed_time": "0:27:08", "remaining_time": "0:12:09"} +{"current_steps": 1300, "total_steps": 1875, "loss": 0.5945, "lr": 4.2928643231556844e-05, "epoch": 2.08, "percentage": 69.33, "elapsed_time": "0:27:14", "remaining_time": "0:12:02"} +{"current_steps": 1305, "total_steps": 1875, "loss": 0.4813, "lr": 4.224272965777326e-05, "epoch": 2.088, "percentage": 69.6, "elapsed_time": "0:27:22", "remaining_time": "0:11:57"} +{"current_steps": 1310, "total_steps": 1875, "loss": 0.5315, "lr": 4.15608696880828e-05, "epoch": 2.096, "percentage": 69.87, "elapsed_time": "0:27:28", "remaining_time": "0:11:51"} +{"current_steps": 1315, "total_steps": 1875, "loss": 0.5591, "lr": 4.08831111777658e-05, "epoch": 2.104, "percentage": 70.13, "elapsed_time": "0:27:34", "remaining_time": "0:11:44"} +{"current_steps": 1320, "total_steps": 1875, "loss": 0.605, "lr": 4.020950169424815e-05, "epoch": 2.112, "percentage": 70.4, "elapsed_time": "0:27:40", "remaining_time": "0:11:37"} +{"current_steps": 1325, "total_steps": 1875, "loss": 0.4955, "lr": 3.954008851376252e-05, "epoch": 2.12, "percentage": 70.67, "elapsed_time": "0:27:47", "remaining_time": "0:11:32"} +{"current_steps": 1330, "total_steps": 1875, "loss": 0.5757, "lr": 3.887491861803085e-05, "epoch": 2.128, "percentage": 70.93, "elapsed_time": "0:27:52", "remaining_time": "0:11:25"} +{"current_steps": 1335, "total_steps": 1875, "loss": 0.5313, "lr": 3.821403869096658e-05, "epoch": 2.136, "percentage": 71.2, "elapsed_time": "0:27:59", "remaining_time": "0:11:19"} +{"current_steps": 1340, "total_steps": 1875, "loss": 0.5904, "lr": 3.755749511539845e-05, "epoch": 2.144, "percentage": 71.47, "elapsed_time": "0:28:06", "remaining_time": "0:11:13"} +{"current_steps": 1345, "total_steps": 1875, "loss": 0.4679, "lr": 3.690533396981504e-05, "epoch": 2.152, "percentage": 71.73, "elapsed_time": "0:28:12", "remaining_time": "0:11:06"} +{"current_steps": 1350, "total_steps": 1875, "loss": 0.5235, "lr": 3.6257601025131026e-05, "epoch": 2.16, "percentage": 72.0, "elapsed_time": "0:28:17", "remaining_time": "0:10:59"} +{"current_steps": 1355, "total_steps": 1875, "loss": 0.5797, "lr": 3.561434174147463e-05, "epoch": 2.168, "percentage": 72.27, "elapsed_time": "0:28:21", "remaining_time": "0:10:52"} +{"current_steps": 1360, "total_steps": 1875, "loss": 0.5772, "lr": 3.497560126499709e-05, "epoch": 2.176, "percentage": 72.53, "elapsed_time": "0:28:26", "remaining_time": "0:10:46"} +{"current_steps": 1365, "total_steps": 1875, "loss": 0.5316, "lr": 3.4341424424704375e-05, "epoch": 2.184, "percentage": 72.8, "elapsed_time": "0:28:32", "remaining_time": "0:10:39"} +{"current_steps": 1370, "total_steps": 1875, "loss": 0.5646, "lr": 3.371185572931048e-05, "epoch": 2.192, "percentage": 73.07, "elapsed_time": "0:28:38", "remaining_time": "0:10:33"} +{"current_steps": 1375, "total_steps": 1875, "loss": 0.5431, "lr": 3.308693936411421e-05, "epoch": 2.2, "percentage": 73.33, "elapsed_time": "0:28:43", "remaining_time": "0:10:26"} +{"current_steps": 1380, "total_steps": 1875, "loss": 0.5403, "lr": 3.246671918789755e-05, "epoch": 2.208, "percentage": 73.6, "elapsed_time": "0:28:48", "remaining_time": "0:10:20"} +{"current_steps": 1385, "total_steps": 1875, "loss": 0.5329, "lr": 3.1851238729848034e-05, "epoch": 2.216, "percentage": 73.87, "elapsed_time": "0:28:53", "remaining_time": "0:10:13"} +{"current_steps": 1390, "total_steps": 1875, "loss": 0.5696, "lr": 3.124054118650327e-05, "epoch": 2.224, "percentage": 74.13, "elapsed_time": "0:29:01", "remaining_time": "0:10:07"} +{"current_steps": 1395, "total_steps": 1875, "loss": 0.59, "lr": 3.063466941871952e-05, "epoch": 2.232, "percentage": 74.4, "elapsed_time": "0:29:07", "remaining_time": "0:10:01"} +{"current_steps": 1400, "total_steps": 1875, "loss": 0.5116, "lr": 3.0033665948663448e-05, "epoch": 2.24, "percentage": 74.67, "elapsed_time": "0:29:14", "remaining_time": "0:09:55"} +{"current_steps": 1405, "total_steps": 1875, "loss": 0.5783, "lr": 2.9437572956827964e-05, "epoch": 2.248, "percentage": 74.93, "elapsed_time": "0:29:21", "remaining_time": "0:09:49"} +{"current_steps": 1410, "total_steps": 1875, "loss": 0.6259, "lr": 2.8846432279071467e-05, "epoch": 2.2560000000000002, "percentage": 75.2, "elapsed_time": "0:29:27", "remaining_time": "0:09:42"} +{"current_steps": 1415, "total_steps": 1875, "loss": 0.5759, "lr": 2.826028540368215e-05, "epoch": 2.2640000000000002, "percentage": 75.47, "elapsed_time": "0:29:35", "remaining_time": "0:09:37"} +{"current_steps": 1420, "total_steps": 1875, "loss": 0.497, "lr": 2.7679173468465812e-05, "epoch": 2.2720000000000002, "percentage": 75.73, "elapsed_time": "0:29:41", "remaining_time": "0:09:30"} +{"current_steps": 1425, "total_steps": 1875, "loss": 0.619, "lr": 2.7103137257858868e-05, "epoch": 2.2800000000000002, "percentage": 76.0, "elapsed_time": "0:29:47", "remaining_time": "0:09:24"} +{"current_steps": 1430, "total_steps": 1875, "loss": 0.5858, "lr": 2.6532217200065858e-05, "epoch": 2.288, "percentage": 76.27, "elapsed_time": "0:29:52", "remaining_time": "0:09:17"} +{"current_steps": 1435, "total_steps": 1875, "loss": 0.6291, "lr": 2.5966453364222186e-05, "epoch": 2.296, "percentage": 76.53, "elapsed_time": "0:29:58", "remaining_time": "0:09:11"} +{"current_steps": 1440, "total_steps": 1875, "loss": 0.6418, "lr": 2.540588545758179e-05, "epoch": 2.304, "percentage": 76.8, "elapsed_time": "0:30:04", "remaining_time": "0:09:05"} +{"current_steps": 1445, "total_steps": 1875, "loss": 0.5483, "lr": 2.48505528227304e-05, "epoch": 2.312, "percentage": 77.07, "elapsed_time": "0:30:11", "remaining_time": "0:08:59"} +{"current_steps": 1450, "total_steps": 1875, "loss": 0.6071, "lr": 2.4300494434824373e-05, "epoch": 2.32, "percentage": 77.33, "elapsed_time": "0:30:18", "remaining_time": "0:08:53"} +{"current_steps": 1455, "total_steps": 1875, "loss": 0.5099, "lr": 2.37557488988552e-05, "epoch": 2.328, "percentage": 77.6, "elapsed_time": "0:30:25", "remaining_time": "0:08:47"} +{"current_steps": 1460, "total_steps": 1875, "loss": 0.5186, "lr": 2.321635444694028e-05, "epoch": 2.336, "percentage": 77.87, "elapsed_time": "0:30:30", "remaining_time": "0:08:40"} +{"current_steps": 1465, "total_steps": 1875, "loss": 0.5043, "lr": 2.2682348935639274e-05, "epoch": 2.344, "percentage": 78.13, "elapsed_time": "0:30:36", "remaining_time": "0:08:33"} +{"current_steps": 1470, "total_steps": 1875, "loss": 0.61, "lr": 2.2153769843297667e-05, "epoch": 2.352, "percentage": 78.4, "elapsed_time": "0:30:42", "remaining_time": "0:08:27"} +{"current_steps": 1475, "total_steps": 1875, "loss": 0.5987, "lr": 2.163065426741603e-05, "epoch": 2.36, "percentage": 78.67, "elapsed_time": "0:30:50", "remaining_time": "0:08:21"} +{"current_steps": 1480, "total_steps": 1875, "loss": 0.5212, "lr": 2.1113038922046602e-05, "epoch": 2.368, "percentage": 78.93, "elapsed_time": "0:30:57", "remaining_time": "0:08:15"} +{"current_steps": 1485, "total_steps": 1875, "loss": 0.4796, "lr": 2.0600960135216462e-05, "epoch": 2.376, "percentage": 79.2, "elapsed_time": "0:31:04", "remaining_time": "0:08:09"} +{"current_steps": 1490, "total_steps": 1875, "loss": 0.4844, "lr": 2.009445384637805e-05, "epoch": 2.384, "percentage": 79.47, "elapsed_time": "0:31:11", "remaining_time": "0:08:03"} +{"current_steps": 1495, "total_steps": 1875, "loss": 0.5085, "lr": 1.9593555603886538e-05, "epoch": 2.392, "percentage": 79.73, "elapsed_time": "0:31:16", "remaining_time": "0:07:57"} +{"current_steps": 1500, "total_steps": 1875, "loss": 0.4839, "lr": 1.9098300562505266e-05, "epoch": 2.4, "percentage": 80.0, "elapsed_time": "0:31:23", "remaining_time": "0:07:50"} +{"current_steps": 1505, "total_steps": 1875, "loss": 0.5715, "lr": 1.8608723480938206e-05, "epoch": 2.408, "percentage": 80.27, "elapsed_time": "0:31:32", "remaining_time": "0:07:45"} +{"current_steps": 1510, "total_steps": 1875, "loss": 0.5266, "lr": 1.812485871939056e-05, "epoch": 2.416, "percentage": 80.53, "elapsed_time": "0:31:36", "remaining_time": "0:07:38"} +{"current_steps": 1515, "total_steps": 1875, "loss": 0.5422, "lr": 1.7646740237157256e-05, "epoch": 2.424, "percentage": 80.8, "elapsed_time": "0:31:43", "remaining_time": "0:07:32"} +{"current_steps": 1520, "total_steps": 1875, "loss": 0.5553, "lr": 1.7174401590239587e-05, "epoch": 2.432, "percentage": 81.07, "elapsed_time": "0:31:49", "remaining_time": "0:07:25"} +{"current_steps": 1525, "total_steps": 1875, "loss": 0.5765, "lr": 1.6707875928990058e-05, "epoch": 2.44, "percentage": 81.33, "elapsed_time": "0:31:57", "remaining_time": "0:07:20"} +{"current_steps": 1530, "total_steps": 1875, "loss": 0.549, "lr": 1.6247195995785837e-05, "epoch": 2.448, "percentage": 81.6, "elapsed_time": "0:32:03", "remaining_time": "0:07:13"} +{"current_steps": 1535, "total_steps": 1875, "loss": 0.4876, "lr": 1.579239412273078e-05, "epoch": 2.456, "percentage": 81.87, "elapsed_time": "0:32:08", "remaining_time": "0:07:07"} +{"current_steps": 1540, "total_steps": 1875, "loss": 0.5546, "lr": 1.5343502229386207e-05, "epoch": 2.464, "percentage": 82.13, "elapsed_time": "0:32:13", "remaining_time": "0:07:00"} +{"current_steps": 1545, "total_steps": 1875, "loss": 0.5356, "lr": 1.4900551820530828e-05, "epoch": 2.472, "percentage": 82.4, "elapsed_time": "0:32:18", "remaining_time": "0:06:54"} +{"current_steps": 1550, "total_steps": 1875, "loss": 0.5142, "lr": 1.4463573983949341e-05, "epoch": 2.48, "percentage": 82.67, "elapsed_time": "0:32:25", "remaining_time": "0:06:47"} +{"current_steps": 1555, "total_steps": 1875, "loss": 0.6054, "lr": 1.40325993882509e-05, "epoch": 2.488, "percentage": 82.93, "elapsed_time": "0:32:29", "remaining_time": "0:06:41"} +{"current_steps": 1560, "total_steps": 1875, "loss": 0.5294, "lr": 1.3607658280716473e-05, "epoch": 2.496, "percentage": 83.2, "elapsed_time": "0:32:35", "remaining_time": "0:06:34"} +{"current_steps": 1565, "total_steps": 1875, "loss": 0.5294, "lr": 1.3188780485176088e-05, "epoch": 2.504, "percentage": 83.47, "elapsed_time": "0:32:40", "remaining_time": "0:06:28"} +{"current_steps": 1570, "total_steps": 1875, "loss": 0.4905, "lr": 1.2775995399915631e-05, "epoch": 2.512, "percentage": 83.73, "elapsed_time": "0:32:47", "remaining_time": "0:06:22"} +{"current_steps": 1575, "total_steps": 1875, "loss": 0.5186, "lr": 1.2369331995613665e-05, "epoch": 2.52, "percentage": 84.0, "elapsed_time": "0:32:54", "remaining_time": "0:06:16"} +{"current_steps": 1580, "total_steps": 1875, "loss": 0.4909, "lr": 1.196881881330798e-05, "epoch": 2.528, "percentage": 84.27, "elapsed_time": "0:33:00", "remaining_time": "0:06:09"} +{"current_steps": 1585, "total_steps": 1875, "loss": 0.5303, "lr": 1.1574483962392767e-05, "epoch": 2.536, "percentage": 84.53, "elapsed_time": "0:33:05", "remaining_time": "0:06:03"} +{"current_steps": 1590, "total_steps": 1875, "loss": 0.5169, "lr": 1.1186355118645554e-05, "epoch": 2.544, "percentage": 84.8, "elapsed_time": "0:33:11", "remaining_time": "0:05:57"} +{"current_steps": 1595, "total_steps": 1875, "loss": 0.5339, "lr": 1.0804459522284926e-05, "epoch": 2.552, "percentage": 85.07, "elapsed_time": "0:33:19", "remaining_time": "0:05:50"} +{"current_steps": 1600, "total_steps": 1875, "loss": 0.5283, "lr": 1.042882397605871e-05, "epoch": 2.56, "percentage": 85.33, "elapsed_time": "0:33:23", "remaining_time": "0:05:44"} +{"current_steps": 1605, "total_steps": 1875, "loss": 0.5576, "lr": 1.0059474843362892e-05, "epoch": 2.568, "percentage": 85.6, "elapsed_time": "0:33:30", "remaining_time": "0:05:38"} +{"current_steps": 1610, "total_steps": 1875, "loss": 0.5136, "lr": 9.696438046391288e-06, "epoch": 2.576, "percentage": 85.87, "elapsed_time": "0:33:36", "remaining_time": "0:05:31"} +{"current_steps": 1615, "total_steps": 1875, "loss": 0.5885, "lr": 9.339739064316233e-06, "epoch": 2.584, "percentage": 86.13, "elapsed_time": "0:33:43", "remaining_time": "0:05:25"} +{"current_steps": 1620, "total_steps": 1875, "loss": 0.5, "lr": 8.989402931500434e-06, "epoch": 2.592, "percentage": 86.4, "elapsed_time": "0:33:51", "remaining_time": "0:05:19"} +{"current_steps": 1625, "total_steps": 1875, "loss": 0.5325, "lr": 8.645454235739903e-06, "epoch": 2.6, "percentage": 86.67, "elapsed_time": "0:33:58", "remaining_time": "0:05:13"} +{"current_steps": 1630, "total_steps": 1875, "loss": 0.5772, "lr": 8.307917116538378e-06, "epoch": 2.608, "percentage": 86.93, "elapsed_time": "0:34:04", "remaining_time": "0:05:07"} +{"current_steps": 1635, "total_steps": 1875, "loss": 0.5736, "lr": 7.976815263412963e-06, "epoch": 2.616, "percentage": 87.2, "elapsed_time": "0:34:11", "remaining_time": "0:05:01"} +{"current_steps": 1640, "total_steps": 1875, "loss": 0.5199, "lr": 7.652171914231776e-06, "epoch": 2.624, "percentage": 87.47, "elapsed_time": "0:34:18", "remaining_time": "0:04:54"} +{"current_steps": 1645, "total_steps": 1875, "loss": 0.5753, "lr": 7.3340098535827905e-06, "epoch": 2.632, "percentage": 87.73, "elapsed_time": "0:34:24", "remaining_time": "0:04:48"} +{"current_steps": 1650, "total_steps": 1875, "loss": 0.5424, "lr": 7.022351411174866e-06, "epoch": 2.64, "percentage": 88.0, "elapsed_time": "0:34:29", "remaining_time": "0:04:42"} +{"current_steps": 1655, "total_steps": 1875, "loss": 0.5555, "lr": 6.717218460270536e-06, "epoch": 2.648, "percentage": 88.27, "elapsed_time": "0:34:37", "remaining_time": "0:04:36"} +{"current_steps": 1660, "total_steps": 1875, "loss": 0.4936, "lr": 6.418632416150927e-06, "epoch": 2.656, "percentage": 88.53, "elapsed_time": "0:34:44", "remaining_time": "0:04:29"} +{"current_steps": 1665, "total_steps": 1875, "loss": 0.6291, "lr": 6.126614234612593e-06, "epoch": 2.664, "percentage": 88.8, "elapsed_time": "0:34:50", "remaining_time": "0:04:23"} +{"current_steps": 1670, "total_steps": 1875, "loss": 0.5197, "lr": 5.8411844104969916e-06, "epoch": 2.672, "percentage": 89.07, "elapsed_time": "0:34:55", "remaining_time": "0:04:17"} +{"current_steps": 1675, "total_steps": 1875, "loss": 0.5398, "lr": 5.562362976251901e-06, "epoch": 2.68, "percentage": 89.33, "elapsed_time": "0:35:02", "remaining_time": "0:04:11"} +{"current_steps": 1680, "total_steps": 1875, "loss": 0.6059, "lr": 5.290169500525577e-06, "epoch": 2.6879999999999997, "percentage": 89.6, "elapsed_time": "0:35:08", "remaining_time": "0:04:04"} +{"current_steps": 1685, "total_steps": 1875, "loss": 0.531, "lr": 5.024623086793323e-06, "epoch": 2.6959999999999997, "percentage": 89.87, "elapsed_time": "0:35:14", "remaining_time": "0:03:58"} +{"current_steps": 1690, "total_steps": 1875, "loss": 0.6054, "lr": 4.765742372016735e-06, "epoch": 2.7039999999999997, "percentage": 90.13, "elapsed_time": "0:35:19", "remaining_time": "0:03:51"} +{"current_steps": 1695, "total_steps": 1875, "loss": 0.5173, "lr": 4.513545525335705e-06, "epoch": 2.7119999999999997, "percentage": 90.4, "elapsed_time": "0:35:25", "remaining_time": "0:03:45"} +{"current_steps": 1700, "total_steps": 1875, "loss": 0.4944, "lr": 4.268050246793276e-06, "epoch": 2.7199999999999998, "percentage": 90.67, "elapsed_time": "0:35:31", "remaining_time": "0:03:39"} +{"current_steps": 1705, "total_steps": 1875, "loss": 0.5183, "lr": 4.029273766093333e-06, "epoch": 2.7279999999999998, "percentage": 90.93, "elapsed_time": "0:35:38", "remaining_time": "0:03:33"} +{"current_steps": 1710, "total_steps": 1875, "loss": 0.668, "lr": 3.797232841391407e-06, "epoch": 2.7359999999999998, "percentage": 91.2, "elapsed_time": "0:35:44", "remaining_time": "0:03:26"} +{"current_steps": 1715, "total_steps": 1875, "loss": 0.5068, "lr": 3.5719437581185454e-06, "epoch": 2.7439999999999998, "percentage": 91.47, "elapsed_time": "0:35:50", "remaining_time": "0:03:20"} +{"current_steps": 1720, "total_steps": 1875, "loss": 0.5823, "lr": 3.3534223278382405e-06, "epoch": 2.752, "percentage": 91.73, "elapsed_time": "0:35:57", "remaining_time": "0:03:14"} +{"current_steps": 1725, "total_steps": 1875, "loss": 0.6111, "lr": 3.1416838871368924e-06, "epoch": 2.76, "percentage": 92.0, "elapsed_time": "0:36:02", "remaining_time": "0:03:08"} +{"current_steps": 1730, "total_steps": 1875, "loss": 0.5231, "lr": 2.936743296547273e-06, "epoch": 2.768, "percentage": 92.27, "elapsed_time": "0:36:07", "remaining_time": "0:03:01"} +{"current_steps": 1735, "total_steps": 1875, "loss": 0.5236, "lr": 2.738614939505646e-06, "epoch": 2.776, "percentage": 92.53, "elapsed_time": "0:36:14", "remaining_time": "0:02:55"} +{"current_steps": 1740, "total_steps": 1875, "loss": 0.5657, "lr": 2.5473127213422763e-06, "epoch": 2.784, "percentage": 92.8, "elapsed_time": "0:36:23", "remaining_time": "0:02:49"} +{"current_steps": 1745, "total_steps": 1875, "loss": 0.5518, "lr": 2.3628500683055222e-06, "epoch": 2.792, "percentage": 93.07, "elapsed_time": "0:36:28", "remaining_time": "0:02:43"} +{"current_steps": 1750, "total_steps": 1875, "loss": 0.4908, "lr": 2.1852399266194314e-06, "epoch": 2.8, "percentage": 93.33, "elapsed_time": "0:36:34", "remaining_time": "0:02:36"} +{"current_steps": 1755, "total_steps": 1875, "loss": 0.5459, "lr": 2.014494761575314e-06, "epoch": 2.808, "percentage": 93.6, "elapsed_time": "0:36:43", "remaining_time": "0:02:30"} +{"current_steps": 1760, "total_steps": 1875, "loss": 0.5208, "lr": 1.8506265566567094e-06, "epoch": 2.816, "percentage": 93.87, "elapsed_time": "0:36:50", "remaining_time": "0:02:24"} +{"current_steps": 1765, "total_steps": 1875, "loss": 0.5824, "lr": 1.6936468126984572e-06, "epoch": 2.824, "percentage": 94.13, "elapsed_time": "0:36:56", "remaining_time": "0:02:18"} +{"current_steps": 1770, "total_steps": 1875, "loss": 0.5512, "lr": 1.543566547079467e-06, "epoch": 2.832, "percentage": 94.4, "elapsed_time": "0:37:05", "remaining_time": "0:02:12"} +{"current_steps": 1775, "total_steps": 1875, "loss": 0.6327, "lr": 1.400396292949513e-06, "epoch": 2.84, "percentage": 94.67, "elapsed_time": "0:37:12", "remaining_time": "0:02:05"} +{"current_steps": 1780, "total_steps": 1875, "loss": 0.5292, "lr": 1.26414609848996e-06, "epoch": 2.848, "percentage": 94.93, "elapsed_time": "0:37:20", "remaining_time": "0:01:59"} +{"current_steps": 1785, "total_steps": 1875, "loss": 0.5692, "lr": 1.134825526208605e-06, "epoch": 2.856, "percentage": 95.2, "elapsed_time": "0:37:26", "remaining_time": "0:01:53"} +{"current_steps": 1790, "total_steps": 1875, "loss": 0.5532, "lr": 1.0124436522684243e-06, "epoch": 2.864, "percentage": 95.47, "elapsed_time": "0:37:33", "remaining_time": "0:01:47"} +{"current_steps": 1795, "total_steps": 1875, "loss": 0.5314, "lr": 8.970090658507291e-07, "epoch": 2.872, "percentage": 95.73, "elapsed_time": "0:37:38", "remaining_time": "0:01:40"} +{"current_steps": 1800, "total_steps": 1875, "loss": 0.524, "lr": 7.885298685522235e-07, "epoch": 2.88, "percentage": 96.0, "elapsed_time": "0:37:45", "remaining_time": "0:01:34"} +{"current_steps": 1805, "total_steps": 1875, "loss": 0.5227, "lr": 6.870136738164612e-07, "epoch": 2.888, "percentage": 96.27, "elapsed_time": "0:37:52", "remaining_time": "0:01:28"} +{"current_steps": 1810, "total_steps": 1875, "loss": 0.5993, "lr": 5.924676063995382e-07, "epoch": 2.896, "percentage": 96.53, "elapsed_time": "0:37:58", "remaining_time": "0:01:21"} +{"current_steps": 1815, "total_steps": 1875, "loss": 0.5618, "lr": 5.048983018699827e-07, "epoch": 2.904, "percentage": 96.8, "elapsed_time": "0:38:06", "remaining_time": "0:01:15"} +{"current_steps": 1820, "total_steps": 1875, "loss": 0.5504, "lr": 4.2431190614309335e-07, "epoch": 2.912, "percentage": 97.07, "elapsed_time": "0:38:12", "remaining_time": "0:01:09"} +{"current_steps": 1825, "total_steps": 1875, "loss": 0.5147, "lr": 3.50714075049563e-07, "epoch": 2.92, "percentage": 97.33, "elapsed_time": "0:38:19", "remaining_time": "0:01:02"} +{"current_steps": 1830, "total_steps": 1875, "loss": 0.5564, "lr": 2.841099739386066e-07, "epoch": 2.928, "percentage": 97.6, "elapsed_time": "0:38:26", "remaining_time": "0:00:56"} +{"current_steps": 1835, "total_steps": 1875, "loss": 0.5188, "lr": 2.2450427731534053e-07, "epoch": 2.936, "percentage": 97.87, "elapsed_time": "0:38:34", "remaining_time": "0:00:50"} +{"current_steps": 1840, "total_steps": 1875, "loss": 0.5438, "lr": 1.7190116851280026e-07, "epoch": 2.944, "percentage": 98.13, "elapsed_time": "0:38:39", "remaining_time": "0:00:44"} +{"current_steps": 1845, "total_steps": 1875, "loss": 0.4962, "lr": 1.2630433939825327e-07, "epoch": 2.952, "percentage": 98.4, "elapsed_time": "0:38:45", "remaining_time": "0:00:37"} +{"current_steps": 1850, "total_steps": 1875, "loss": 0.5021, "lr": 8.771699011416168e-08, "epoch": 2.96, "percentage": 98.67, "elapsed_time": "0:38:52", "remaining_time": "0:00:31"} +{"current_steps": 1855, "total_steps": 1875, "loss": 0.5277, "lr": 5.6141828853573106e-08, "epoch": 2.968, "percentage": 98.93, "elapsed_time": "0:38:58", "remaining_time": "0:00:25"} +{"current_steps": 1860, "total_steps": 1875, "loss": 0.5764, "lr": 3.1581071670006015e-08, "epoch": 2.976, "percentage": 99.2, "elapsed_time": "0:39:01", "remaining_time": "0:00:18"} +{"current_steps": 1865, "total_steps": 1875, "loss": 0.5408, "lr": 1.4036442321962995e-08, "epoch": 2.984, "percentage": 99.47, "elapsed_time": "0:39:08", "remaining_time": "0:00:12"} +{"current_steps": 1870, "total_steps": 1875, "loss": 0.5014, "lr": 3.509172151938689e-09, "epoch": 2.992, "percentage": 99.73, "elapsed_time": "0:39:17", "remaining_time": "0:00:06"} +{"current_steps": 1875, "total_steps": 1875, "loss": 0.6182, "lr": 0.0, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:39:23", "remaining_time": "0:00:00"} +{"current_steps": 1875, "total_steps": 1875, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:39:25", "remaining_time": "0:00:00"} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5b8b227e539a162d3769a6a052c3a5adfa6820f9 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,2667 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1875, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008, + "grad_norm": 0.8485889434814453, + "learning_rate": 0.0001999964908278481, + "loss": 1.2049, + "step": 5 + }, + { + "epoch": 0.016, + "grad_norm": 0.47789862751960754, + "learning_rate": 0.00019998596355767805, + "loss": 0.9333, + "step": 10 + }, + { + "epoch": 0.024, + "grad_norm": 1.017558217048645, + "learning_rate": 0.00019996841892833, + "loss": 0.8671, + "step": 15 + }, + { + "epoch": 0.032, + "grad_norm": 0.6610977053642273, + "learning_rate": 0.00019994385817114646, + "loss": 0.7979, + "step": 20 + }, + { + "epoch": 0.04, + "grad_norm": 0.6075429320335388, + "learning_rate": 0.00019991228300988585, + "loss": 0.7662, + "step": 25 + }, + { + "epoch": 0.048, + "grad_norm": 0.6595763564109802, + "learning_rate": 0.00019987369566060176, + "loss": 0.7929, + "step": 30 + }, + { + "epoch": 0.056, + "grad_norm": 0.6968618035316467, + "learning_rate": 0.00019982809883148722, + "loss": 0.7683, + "step": 35 + }, + { + "epoch": 0.064, + "grad_norm": 0.4889592230319977, + "learning_rate": 0.00019977549572268468, + "loss": 0.8667, + "step": 40 + }, + { + "epoch": 0.072, + "grad_norm": 0.6651108264923096, + "learning_rate": 0.0001997158900260614, + "loss": 0.8446, + "step": 45 + }, + { + "epoch": 0.08, + "grad_norm": 0.5898510217666626, + "learning_rate": 0.00019964928592495045, + "loss": 0.9051, + "step": 50 + }, + { + "epoch": 0.088, + "grad_norm": 0.4398016035556793, + "learning_rate": 0.00019957568809385694, + "loss": 0.7235, + "step": 55 + }, + { + "epoch": 0.096, + "grad_norm": 0.6901968121528625, + "learning_rate": 0.00019949510169813003, + "loss": 0.8169, + "step": 60 + }, + { + "epoch": 0.104, + "grad_norm": 0.6267213225364685, + "learning_rate": 0.00019940753239360047, + "loss": 0.8266, + "step": 65 + }, + { + "epoch": 0.112, + "grad_norm": 0.48524895310401917, + "learning_rate": 0.00019931298632618356, + "loss": 0.758, + "step": 70 + }, + { + "epoch": 0.12, + "grad_norm": 0.5294132232666016, + "learning_rate": 0.0001992114701314478, + "loss": 0.7759, + "step": 75 + }, + { + "epoch": 0.128, + "grad_norm": 0.48957982659339905, + "learning_rate": 0.0001991029909341493, + "loss": 0.7797, + "step": 80 + }, + { + "epoch": 0.136, + "grad_norm": 0.645412802696228, + "learning_rate": 0.00019898755634773158, + "loss": 0.7437, + "step": 85 + }, + { + "epoch": 0.144, + "grad_norm": 0.43297675251960754, + "learning_rate": 0.0001988651744737914, + "loss": 0.8043, + "step": 90 + }, + { + "epoch": 0.152, + "grad_norm": 0.5513920783996582, + "learning_rate": 0.00019873585390151003, + "loss": 0.7701, + "step": 95 + }, + { + "epoch": 0.16, + "grad_norm": 0.8462435007095337, + "learning_rate": 0.0001985996037070505, + "loss": 0.709, + "step": 100 + }, + { + "epoch": 0.168, + "grad_norm": 0.6892585158348083, + "learning_rate": 0.00019845643345292054, + "loss": 0.7377, + "step": 105 + }, + { + "epoch": 0.176, + "grad_norm": 0.4617864191532135, + "learning_rate": 0.00019830635318730154, + "loss": 0.8352, + "step": 110 + }, + { + "epoch": 0.184, + "grad_norm": 0.6300354599952698, + "learning_rate": 0.0001981493734433433, + "loss": 0.7738, + "step": 115 + }, + { + "epoch": 0.192, + "grad_norm": 0.8086859583854675, + "learning_rate": 0.0001979855052384247, + "loss": 0.8067, + "step": 120 + }, + { + "epoch": 0.2, + "grad_norm": 0.6272985935211182, + "learning_rate": 0.00019781476007338058, + "loss": 0.7456, + "step": 125 + }, + { + "epoch": 0.208, + "grad_norm": 0.44750839471817017, + "learning_rate": 0.00019763714993169452, + "loss": 0.758, + "step": 130 + }, + { + "epoch": 0.216, + "grad_norm": 0.5053977370262146, + "learning_rate": 0.00019745268727865774, + "loss": 0.7895, + "step": 135 + }, + { + "epoch": 0.224, + "grad_norm": 0.41920769214630127, + "learning_rate": 0.00019726138506049438, + "loss": 0.7302, + "step": 140 + }, + { + "epoch": 0.232, + "grad_norm": 0.38280290365219116, + "learning_rate": 0.00019706325670345275, + "loss": 0.8152, + "step": 145 + }, + { + "epoch": 0.24, + "grad_norm": 0.554710865020752, + "learning_rate": 0.0001968583161128631, + "loss": 0.8461, + "step": 150 + }, + { + "epoch": 0.248, + "grad_norm": 0.5612509250640869, + "learning_rate": 0.00019664657767216176, + "loss": 0.7787, + "step": 155 + }, + { + "epoch": 0.256, + "grad_norm": 0.610614538192749, + "learning_rate": 0.00019642805624188147, + "loss": 0.7574, + "step": 160 + }, + { + "epoch": 0.264, + "grad_norm": 0.679517924785614, + "learning_rate": 0.0001962027671586086, + "loss": 0.8487, + "step": 165 + }, + { + "epoch": 0.272, + "grad_norm": 0.6685434579849243, + "learning_rate": 0.00019597072623390668, + "loss": 0.6611, + "step": 170 + }, + { + "epoch": 0.28, + "grad_norm": 0.480293869972229, + "learning_rate": 0.00019573194975320673, + "loss": 0.7802, + "step": 175 + }, + { + "epoch": 0.288, + "grad_norm": 0.7727369070053101, + "learning_rate": 0.00019548645447466431, + "loss": 0.6727, + "step": 180 + }, + { + "epoch": 0.296, + "grad_norm": 0.6371043920516968, + "learning_rate": 0.00019523425762798329, + "loss": 0.7502, + "step": 185 + }, + { + "epoch": 0.304, + "grad_norm": 0.6399966478347778, + "learning_rate": 0.00019497537691320668, + "loss": 0.8401, + "step": 190 + }, + { + "epoch": 0.312, + "grad_norm": 0.7263137698173523, + "learning_rate": 0.00019470983049947444, + "loss": 0.7494, + "step": 195 + }, + { + "epoch": 0.32, + "grad_norm": 0.402416467666626, + "learning_rate": 0.00019443763702374812, + "loss": 0.7842, + "step": 200 + }, + { + "epoch": 0.328, + "grad_norm": 0.6639626026153564, + "learning_rate": 0.00019415881558950302, + "loss": 0.8082, + "step": 205 + }, + { + "epoch": 0.336, + "grad_norm": 0.5801042914390564, + "learning_rate": 0.00019387338576538744, + "loss": 0.7883, + "step": 210 + }, + { + "epoch": 0.344, + "grad_norm": 0.5533607006072998, + "learning_rate": 0.00019358136758384912, + "loss": 0.7356, + "step": 215 + }, + { + "epoch": 0.352, + "grad_norm": 0.6019654273986816, + "learning_rate": 0.00019328278153972947, + "loss": 0.7891, + "step": 220 + }, + { + "epoch": 0.36, + "grad_norm": 0.5344104170799255, + "learning_rate": 0.00019297764858882514, + "loss": 0.7671, + "step": 225 + }, + { + "epoch": 0.368, + "grad_norm": 0.5494843125343323, + "learning_rate": 0.0001926659901464172, + "loss": 0.6608, + "step": 230 + }, + { + "epoch": 0.376, + "grad_norm": 0.465420126914978, + "learning_rate": 0.00019234782808576824, + "loss": 0.647, + "step": 235 + }, + { + "epoch": 0.384, + "grad_norm": 0.5202775001525879, + "learning_rate": 0.00019202318473658705, + "loss": 0.729, + "step": 240 + }, + { + "epoch": 0.392, + "grad_norm": 0.5757818222045898, + "learning_rate": 0.00019169208288346166, + "loss": 0.6713, + "step": 245 + }, + { + "epoch": 0.4, + "grad_norm": 0.46555572748184204, + "learning_rate": 0.0001913545457642601, + "loss": 0.7049, + "step": 250 + }, + { + "epoch": 0.408, + "grad_norm": 0.5101790428161621, + "learning_rate": 0.00019101059706849957, + "loss": 0.7419, + "step": 255 + }, + { + "epoch": 0.416, + "grad_norm": 0.6083744764328003, + "learning_rate": 0.00019066026093568378, + "loss": 0.7148, + "step": 260 + }, + { + "epoch": 0.424, + "grad_norm": 0.4719640612602234, + "learning_rate": 0.00019030356195360874, + "loss": 0.7493, + "step": 265 + }, + { + "epoch": 0.432, + "grad_norm": 0.7365225553512573, + "learning_rate": 0.0001899405251566371, + "loss": 0.7652, + "step": 270 + }, + { + "epoch": 0.44, + "grad_norm": 0.4452705383300781, + "learning_rate": 0.0001895711760239413, + "loss": 0.7438, + "step": 275 + }, + { + "epoch": 0.448, + "grad_norm": 0.6071786284446716, + "learning_rate": 0.0001891955404777151, + "loss": 0.7683, + "step": 280 + }, + { + "epoch": 0.456, + "grad_norm": 0.5774498581886292, + "learning_rate": 0.00018881364488135448, + "loss": 0.8115, + "step": 285 + }, + { + "epoch": 0.464, + "grad_norm": 0.6134682893753052, + "learning_rate": 0.00018842551603760724, + "loss": 0.8335, + "step": 290 + }, + { + "epoch": 0.472, + "grad_norm": 0.4869893193244934, + "learning_rate": 0.00018803118118669202, + "loss": 0.6933, + "step": 295 + }, + { + "epoch": 0.48, + "grad_norm": 0.6457111239433289, + "learning_rate": 0.00018763066800438636, + "loss": 0.7515, + "step": 300 + }, + { + "epoch": 0.488, + "grad_norm": 0.59674471616745, + "learning_rate": 0.0001872240046000844, + "loss": 0.6931, + "step": 305 + }, + { + "epoch": 0.496, + "grad_norm": 0.44608160853385925, + "learning_rate": 0.00018681121951482393, + "loss": 0.782, + "step": 310 + }, + { + "epoch": 0.504, + "grad_norm": 0.5934664607048035, + "learning_rate": 0.00018639234171928353, + "loss": 0.7361, + "step": 315 + }, + { + "epoch": 0.512, + "grad_norm": 0.49716323614120483, + "learning_rate": 0.0001859674006117491, + "loss": 0.7443, + "step": 320 + }, + { + "epoch": 0.52, + "grad_norm": 0.47995495796203613, + "learning_rate": 0.00018553642601605068, + "loss": 0.7221, + "step": 325 + }, + { + "epoch": 0.528, + "grad_norm": 0.5177399516105652, + "learning_rate": 0.00018509944817946922, + "loss": 0.7622, + "step": 330 + }, + { + "epoch": 0.536, + "grad_norm": 0.6638798713684082, + "learning_rate": 0.0001846564977706138, + "loss": 0.8556, + "step": 335 + }, + { + "epoch": 0.544, + "grad_norm": 0.5056771636009216, + "learning_rate": 0.00018420760587726923, + "loss": 0.7814, + "step": 340 + }, + { + "epoch": 0.552, + "grad_norm": 0.44543707370758057, + "learning_rate": 0.0001837528040042142, + "loss": 0.722, + "step": 345 + }, + { + "epoch": 0.56, + "grad_norm": 0.6765120625495911, + "learning_rate": 0.00018329212407100994, + "loss": 0.7903, + "step": 350 + }, + { + "epoch": 0.568, + "grad_norm": 0.49232372641563416, + "learning_rate": 0.00018282559840976042, + "loss": 0.6996, + "step": 355 + }, + { + "epoch": 0.576, + "grad_norm": 0.47392791509628296, + "learning_rate": 0.00018235325976284275, + "loss": 0.773, + "step": 360 + }, + { + "epoch": 0.584, + "grad_norm": 0.5056615471839905, + "learning_rate": 0.00018187514128060946, + "loss": 0.728, + "step": 365 + }, + { + "epoch": 0.592, + "grad_norm": 0.5857616662979126, + "learning_rate": 0.00018139127651906184, + "loss": 0.7659, + "step": 370 + }, + { + "epoch": 0.6, + "grad_norm": 0.5966864228248596, + "learning_rate": 0.00018090169943749476, + "loss": 0.7039, + "step": 375 + }, + { + "epoch": 0.608, + "grad_norm": 0.4524347484111786, + "learning_rate": 0.00018040644439611348, + "loss": 0.7125, + "step": 380 + }, + { + "epoch": 0.616, + "grad_norm": 0.5570976138114929, + "learning_rate": 0.00017990554615362198, + "loss": 0.698, + "step": 385 + }, + { + "epoch": 0.624, + "grad_norm": 0.6045777201652527, + "learning_rate": 0.00017939903986478355, + "loss": 0.8255, + "step": 390 + }, + { + "epoch": 0.632, + "grad_norm": 0.6149687767028809, + "learning_rate": 0.00017888696107795342, + "loss": 0.6616, + "step": 395 + }, + { + "epoch": 0.64, + "grad_norm": 0.4873579144477844, + "learning_rate": 0.000178369345732584, + "loss": 0.7452, + "step": 400 + }, + { + "epoch": 0.648, + "grad_norm": 0.5569061636924744, + "learning_rate": 0.00017784623015670238, + "loss": 0.7652, + "step": 405 + }, + { + "epoch": 0.656, + "grad_norm": 0.5825181603431702, + "learning_rate": 0.00017731765106436073, + "loss": 0.7793, + "step": 410 + }, + { + "epoch": 0.664, + "grad_norm": 0.4047383666038513, + "learning_rate": 0.00017678364555305978, + "loss": 0.6875, + "step": 415 + }, + { + "epoch": 0.672, + "grad_norm": 0.5080836415290833, + "learning_rate": 0.0001762442511011448, + "loss": 0.7465, + "step": 420 + }, + { + "epoch": 0.68, + "grad_norm": 0.5825940370559692, + "learning_rate": 0.00017569950556517566, + "loss": 0.7205, + "step": 425 + }, + { + "epoch": 0.688, + "grad_norm": 0.476992666721344, + "learning_rate": 0.00017514944717726962, + "loss": 0.6589, + "step": 430 + }, + { + "epoch": 0.696, + "grad_norm": 0.7424727082252502, + "learning_rate": 0.00017459411454241822, + "loss": 0.7035, + "step": 435 + }, + { + "epoch": 0.704, + "grad_norm": 0.6544787287712097, + "learning_rate": 0.00017403354663577783, + "loss": 0.787, + "step": 440 + }, + { + "epoch": 0.712, + "grad_norm": 0.49425187706947327, + "learning_rate": 0.00017346778279993415, + "loss": 0.7515, + "step": 445 + }, + { + "epoch": 0.72, + "grad_norm": 0.5473236441612244, + "learning_rate": 0.00017289686274214118, + "loss": 0.7199, + "step": 450 + }, + { + "epoch": 0.728, + "grad_norm": 0.6773544549942017, + "learning_rate": 0.00017232082653153422, + "loss": 0.8037, + "step": 455 + }, + { + "epoch": 0.736, + "grad_norm": 0.6355096101760864, + "learning_rate": 0.00017173971459631787, + "loss": 0.7502, + "step": 460 + }, + { + "epoch": 0.744, + "grad_norm": 0.47867000102996826, + "learning_rate": 0.00017115356772092857, + "loss": 0.7446, + "step": 465 + }, + { + "epoch": 0.752, + "grad_norm": 0.5135357975959778, + "learning_rate": 0.0001705624270431721, + "loss": 0.6507, + "step": 470 + }, + { + "epoch": 0.76, + "grad_norm": 0.48866042494773865, + "learning_rate": 0.00016996633405133655, + "loss": 0.7164, + "step": 475 + }, + { + "epoch": 0.768, + "grad_norm": 0.5892354249954224, + "learning_rate": 0.0001693653305812805, + "loss": 0.7621, + "step": 480 + }, + { + "epoch": 0.776, + "grad_norm": 0.6633970141410828, + "learning_rate": 0.00016875945881349676, + "loss": 0.7623, + "step": 485 + }, + { + "epoch": 0.784, + "grad_norm": 0.6444060802459717, + "learning_rate": 0.000168148761270152, + "loss": 0.6606, + "step": 490 + }, + { + "epoch": 0.792, + "grad_norm": 0.7012648582458496, + "learning_rate": 0.00016753328081210245, + "loss": 0.6941, + "step": 495 + }, + { + "epoch": 0.8, + "grad_norm": 0.7064160704612732, + "learning_rate": 0.00016691306063588583, + "loss": 0.6841, + "step": 500 + }, + { + "epoch": 0.808, + "grad_norm": 0.7241398096084595, + "learning_rate": 0.00016628814427068953, + "loss": 0.6996, + "step": 505 + }, + { + "epoch": 0.816, + "grad_norm": 0.7807374596595764, + "learning_rate": 0.00016565857557529566, + "loss": 0.7542, + "step": 510 + }, + { + "epoch": 0.824, + "grad_norm": 0.763768196105957, + "learning_rate": 0.00016502439873500289, + "loss": 0.7175, + "step": 515 + }, + { + "epoch": 0.832, + "grad_norm": 0.6105090379714966, + "learning_rate": 0.0001643856582585254, + "loss": 0.7565, + "step": 520 + }, + { + "epoch": 0.84, + "grad_norm": 0.5686540603637695, + "learning_rate": 0.000163742398974869, + "loss": 0.7339, + "step": 525 + }, + { + "epoch": 0.848, + "grad_norm": 0.5341500043869019, + "learning_rate": 0.00016309466603018496, + "loss": 0.569, + "step": 530 + }, + { + "epoch": 0.856, + "grad_norm": 0.7274748682975769, + "learning_rate": 0.00016244250488460158, + "loss": 0.7556, + "step": 535 + }, + { + "epoch": 0.864, + "grad_norm": 0.7321165204048157, + "learning_rate": 0.00016178596130903344, + "loss": 0.7084, + "step": 540 + }, + { + "epoch": 0.872, + "grad_norm": 0.5086159110069275, + "learning_rate": 0.00016112508138196917, + "loss": 0.6935, + "step": 545 + }, + { + "epoch": 0.88, + "grad_norm": 0.4714389443397522, + "learning_rate": 0.0001604599114862375, + "loss": 0.7076, + "step": 550 + }, + { + "epoch": 0.888, + "grad_norm": 0.5031452178955078, + "learning_rate": 0.0001597904983057519, + "loss": 0.7151, + "step": 555 + }, + { + "epoch": 0.896, + "grad_norm": 0.7745943665504456, + "learning_rate": 0.0001591168888222342, + "loss": 0.7001, + "step": 560 + }, + { + "epoch": 0.904, + "grad_norm": 0.6076303124427795, + "learning_rate": 0.00015843913031191723, + "loss": 0.7285, + "step": 565 + }, + { + "epoch": 0.912, + "grad_norm": 0.7456529140472412, + "learning_rate": 0.00015775727034222675, + "loss": 0.8041, + "step": 570 + }, + { + "epoch": 0.92, + "grad_norm": 0.5760998725891113, + "learning_rate": 0.0001570713567684432, + "loss": 0.7353, + "step": 575 + }, + { + "epoch": 0.928, + "grad_norm": 0.7057327032089233, + "learning_rate": 0.00015638143773034267, + "loss": 0.7792, + "step": 580 + }, + { + "epoch": 0.936, + "grad_norm": 0.7615967392921448, + "learning_rate": 0.00015568756164881882, + "loss": 1.0121, + "step": 585 + }, + { + "epoch": 0.944, + "grad_norm": 0.6304950714111328, + "learning_rate": 0.000154989777222484, + "loss": 0.7727, + "step": 590 + }, + { + "epoch": 0.952, + "grad_norm": 0.6852543950080872, + "learning_rate": 0.00015428813342425177, + "loss": 0.741, + "step": 595 + }, + { + "epoch": 0.96, + "grad_norm": 0.6379660964012146, + "learning_rate": 0.00015358267949789966, + "loss": 0.6919, + "step": 600 + }, + { + "epoch": 0.968, + "grad_norm": 0.5846463441848755, + "learning_rate": 0.00015287346495461315, + "loss": 0.7163, + "step": 605 + }, + { + "epoch": 0.976, + "grad_norm": 0.5999557971954346, + "learning_rate": 0.0001521605395695108, + "loss": 0.8152, + "step": 610 + }, + { + "epoch": 0.984, + "grad_norm": 0.5806307196617126, + "learning_rate": 0.00015144395337815064, + "loss": 0.6709, + "step": 615 + }, + { + "epoch": 0.992, + "grad_norm": 0.6559942960739136, + "learning_rate": 0.00015072375667301893, + "loss": 0.6527, + "step": 620 + }, + { + "epoch": 1.0, + "grad_norm": 0.6287715435028076, + "learning_rate": 0.00015000000000000001, + "loss": 0.8194, + "step": 625 + }, + { + "epoch": 1.008, + "grad_norm": 0.616222620010376, + "learning_rate": 0.00014927273415482915, + "loss": 0.6627, + "step": 630 + }, + { + "epoch": 1.016, + "grad_norm": 0.4750412106513977, + "learning_rate": 0.0001485420101795274, + "loss": 0.6366, + "step": 635 + }, + { + "epoch": 1.024, + "grad_norm": 0.5122964978218079, + "learning_rate": 0.00014780787935881923, + "loss": 0.6717, + "step": 640 + }, + { + "epoch": 1.032, + "grad_norm": 0.7382633090019226, + "learning_rate": 0.0001470703932165333, + "loss": 0.6483, + "step": 645 + }, + { + "epoch": 1.04, + "grad_norm": 0.6540554761886597, + "learning_rate": 0.00014632960351198618, + "loss": 0.6151, + "step": 650 + }, + { + "epoch": 1.048, + "grad_norm": 0.4776591956615448, + "learning_rate": 0.00014558556223635003, + "loss": 0.6707, + "step": 655 + }, + { + "epoch": 1.056, + "grad_norm": 0.8012662529945374, + "learning_rate": 0.00014483832160900326, + "loss": 0.6125, + "step": 660 + }, + { + "epoch": 1.064, + "grad_norm": 0.6735953092575073, + "learning_rate": 0.00014408793407386588, + "loss": 0.6206, + "step": 665 + }, + { + "epoch": 1.072, + "grad_norm": 0.5640230774879456, + "learning_rate": 0.00014333445229571873, + "loss": 0.6161, + "step": 670 + }, + { + "epoch": 1.08, + "grad_norm": 0.5928654074668884, + "learning_rate": 0.00014257792915650728, + "loss": 0.6583, + "step": 675 + }, + { + "epoch": 1.088, + "grad_norm": 0.7347397208213806, + "learning_rate": 0.00014181841775163013, + "loss": 0.6222, + "step": 680 + }, + { + "epoch": 1.096, + "grad_norm": 0.593773365020752, + "learning_rate": 0.0001410559713862128, + "loss": 0.716, + "step": 685 + }, + { + "epoch": 1.104, + "grad_norm": 0.6244611144065857, + "learning_rate": 0.00014029064357136628, + "loss": 0.6198, + "step": 690 + }, + { + "epoch": 1.112, + "grad_norm": 0.5083370804786682, + "learning_rate": 0.00013952248802043165, + "loss": 0.6389, + "step": 695 + }, + { + "epoch": 1.12, + "grad_norm": 0.5241413116455078, + "learning_rate": 0.0001387515586452103, + "loss": 0.6842, + "step": 700 + }, + { + "epoch": 1.1280000000000001, + "grad_norm": 0.524029016494751, + "learning_rate": 0.00013797790955218014, + "loss": 0.6071, + "step": 705 + }, + { + "epoch": 1.1360000000000001, + "grad_norm": 0.5097878575325012, + "learning_rate": 0.00013720159503869815, + "loss": 0.5915, + "step": 710 + }, + { + "epoch": 1.144, + "grad_norm": 0.5782963037490845, + "learning_rate": 0.00013642266958918984, + "loss": 0.6794, + "step": 715 + }, + { + "epoch": 1.152, + "grad_norm": 0.6088266372680664, + "learning_rate": 0.00013564118787132506, + "loss": 0.6773, + "step": 720 + }, + { + "epoch": 1.16, + "grad_norm": 0.7768995761871338, + "learning_rate": 0.00013485720473218154, + "loss": 0.668, + "step": 725 + }, + { + "epoch": 1.168, + "grad_norm": 0.6645551919937134, + "learning_rate": 0.0001340707751943952, + "loss": 0.6997, + "step": 730 + }, + { + "epoch": 1.176, + "grad_norm": 0.9228842258453369, + "learning_rate": 0.00013328195445229868, + "loss": 0.831, + "step": 735 + }, + { + "epoch": 1.184, + "grad_norm": 0.7556049823760986, + "learning_rate": 0.00013249079786804765, + "loss": 0.6378, + "step": 740 + }, + { + "epoch": 1.192, + "grad_norm": 0.832775354385376, + "learning_rate": 0.0001316973609677352, + "loss": 0.6547, + "step": 745 + }, + { + "epoch": 1.2, + "grad_norm": 0.7329304814338684, + "learning_rate": 0.00013090169943749476, + "loss": 0.5808, + "step": 750 + }, + { + "epoch": 1.208, + "grad_norm": 0.7193475961685181, + "learning_rate": 0.00013010386911959206, + "loss": 0.5582, + "step": 755 + }, + { + "epoch": 1.216, + "grad_norm": 0.6274734735488892, + "learning_rate": 0.00012930392600850573, + "loss": 0.5801, + "step": 760 + }, + { + "epoch": 1.224, + "grad_norm": 0.6485865712165833, + "learning_rate": 0.0001285019262469976, + "loss": 0.65, + "step": 765 + }, + { + "epoch": 1.232, + "grad_norm": 0.7164427042007446, + "learning_rate": 0.00012769792612217224, + "loss": 0.6627, + "step": 770 + }, + { + "epoch": 1.24, + "grad_norm": 0.600775957107544, + "learning_rate": 0.00012689198206152657, + "loss": 0.5603, + "step": 775 + }, + { + "epoch": 1.248, + "grad_norm": 0.8377975225448608, + "learning_rate": 0.00012608415062898972, + "loss": 0.6525, + "step": 780 + }, + { + "epoch": 1.256, + "grad_norm": 0.8069924116134644, + "learning_rate": 0.00012527448852095295, + "loss": 0.6731, + "step": 785 + }, + { + "epoch": 1.264, + "grad_norm": 0.6501213908195496, + "learning_rate": 0.00012446305256229073, + "loss": 0.6255, + "step": 790 + }, + { + "epoch": 1.272, + "grad_norm": 0.62812340259552, + "learning_rate": 0.00012364989970237248, + "loss": 0.6585, + "step": 795 + }, + { + "epoch": 1.28, + "grad_norm": 0.5702307820320129, + "learning_rate": 0.00012283508701106557, + "loss": 0.5996, + "step": 800 + }, + { + "epoch": 1.288, + "grad_norm": 0.6311281323432922, + "learning_rate": 0.00012201867167473015, + "loss": 0.6355, + "step": 805 + }, + { + "epoch": 1.296, + "grad_norm": 0.5885419249534607, + "learning_rate": 0.00012120071099220549, + "loss": 0.6615, + "step": 810 + }, + { + "epoch": 1.304, + "grad_norm": 0.5239307284355164, + "learning_rate": 0.0001203812623707885, + "loss": 0.6096, + "step": 815 + }, + { + "epoch": 1.312, + "grad_norm": 0.6101869940757751, + "learning_rate": 0.00011956038332220483, + "loss": 0.5984, + "step": 820 + }, + { + "epoch": 1.32, + "grad_norm": 0.4395413100719452, + "learning_rate": 0.00011873813145857249, + "loss": 0.5569, + "step": 825 + }, + { + "epoch": 1.328, + "grad_norm": 0.8984820246696472, + "learning_rate": 0.00011791456448835825, + "loss": 0.7088, + "step": 830 + }, + { + "epoch": 1.336, + "grad_norm": 0.7709664106369019, + "learning_rate": 0.00011708974021232769, + "loss": 0.6731, + "step": 835 + }, + { + "epoch": 1.3439999999999999, + "grad_norm": 0.6782217025756836, + "learning_rate": 0.00011626371651948838, + "loss": 0.6188, + "step": 840 + }, + { + "epoch": 1.3519999999999999, + "grad_norm": 0.6427358984947205, + "learning_rate": 0.00011543655138302714, + "loss": 0.7004, + "step": 845 + }, + { + "epoch": 1.3599999999999999, + "grad_norm": 0.5902594923973083, + "learning_rate": 0.00011460830285624118, + "loss": 0.5884, + "step": 850 + }, + { + "epoch": 1.3679999999999999, + "grad_norm": 0.5935835838317871, + "learning_rate": 0.0001137790290684638, + "loss": 0.5739, + "step": 855 + }, + { + "epoch": 1.376, + "grad_norm": 0.6752728223800659, + "learning_rate": 0.00011294878822098469, + "loss": 0.6435, + "step": 860 + }, + { + "epoch": 1.384, + "grad_norm": 0.7927135825157166, + "learning_rate": 0.00011211763858296507, + "loss": 0.6897, + "step": 865 + }, + { + "epoch": 1.392, + "grad_norm": 0.714499294757843, + "learning_rate": 0.00011128563848734816, + "loss": 0.6641, + "step": 870 + }, + { + "epoch": 1.4, + "grad_norm": 0.7086356282234192, + "learning_rate": 0.00011045284632676536, + "loss": 0.6273, + "step": 875 + }, + { + "epoch": 1.408, + "grad_norm": 0.6125518679618835, + "learning_rate": 0.00010961932054943778, + "loss": 0.6437, + "step": 880 + }, + { + "epoch": 1.416, + "grad_norm": 0.5635287165641785, + "learning_rate": 0.00010878511965507434, + "loss": 0.6345, + "step": 885 + }, + { + "epoch": 1.424, + "grad_norm": 0.47936007380485535, + "learning_rate": 0.00010795030219076599, + "loss": 0.5913, + "step": 890 + }, + { + "epoch": 1.432, + "grad_norm": 0.7142558097839355, + "learning_rate": 0.00010711492674687671, + "loss": 0.6482, + "step": 895 + }, + { + "epoch": 1.44, + "grad_norm": 0.5252729058265686, + "learning_rate": 0.00010627905195293135, + "loss": 0.6165, + "step": 900 + }, + { + "epoch": 1.448, + "grad_norm": 0.896318793296814, + "learning_rate": 0.00010544273647350092, + "loss": 0.634, + "step": 905 + }, + { + "epoch": 1.456, + "grad_norm": 0.6029036045074463, + "learning_rate": 0.00010460603900408523, + "loss": 0.6509, + "step": 910 + }, + { + "epoch": 1.464, + "grad_norm": 0.6835671663284302, + "learning_rate": 0.00010376901826699348, + "loss": 0.6212, + "step": 915 + }, + { + "epoch": 1.472, + "grad_norm": 0.7098750472068787, + "learning_rate": 0.00010293173300722285, + "loss": 0.7305, + "step": 920 + }, + { + "epoch": 1.48, + "grad_norm": 0.675316333770752, + "learning_rate": 0.0001020942419883357, + "loss": 0.6685, + "step": 925 + }, + { + "epoch": 1.488, + "grad_norm": 0.6675406098365784, + "learning_rate": 0.00010125660398833528, + "loss": 0.6214, + "step": 930 + }, + { + "epoch": 1.496, + "grad_norm": 0.6629154682159424, + "learning_rate": 0.0001004188777955404, + "loss": 0.6035, + "step": 935 + }, + { + "epoch": 1.504, + "grad_norm": 0.7732692360877991, + "learning_rate": 9.958112220445963e-05, + "loss": 0.5868, + "step": 940 + }, + { + "epoch": 1.512, + "grad_norm": 0.6238484978675842, + "learning_rate": 9.874339601166473e-05, + "loss": 0.6003, + "step": 945 + }, + { + "epoch": 1.52, + "grad_norm": 0.6622412800788879, + "learning_rate": 9.790575801166432e-05, + "loss": 0.5854, + "step": 950 + }, + { + "epoch": 1.528, + "grad_norm": 0.7598085999488831, + "learning_rate": 9.706826699277718e-05, + "loss": 0.5882, + "step": 955 + }, + { + "epoch": 1.536, + "grad_norm": 0.8870390057563782, + "learning_rate": 9.623098173300654e-05, + "loss": 0.7187, + "step": 960 + }, + { + "epoch": 1.544, + "grad_norm": 0.702232837677002, + "learning_rate": 9.539396099591476e-05, + "loss": 0.6156, + "step": 965 + }, + { + "epoch": 1.552, + "grad_norm": 0.694139301776886, + "learning_rate": 9.455726352649911e-05, + "loss": 0.6488, + "step": 970 + }, + { + "epoch": 1.56, + "grad_norm": 0.5684956312179565, + "learning_rate": 9.372094804706867e-05, + "loss": 0.6601, + "step": 975 + }, + { + "epoch": 1.568, + "grad_norm": 0.6327118277549744, + "learning_rate": 9.288507325312335e-05, + "loss": 0.5968, + "step": 980 + }, + { + "epoch": 1.576, + "grad_norm": 0.5732144117355347, + "learning_rate": 9.204969780923403e-05, + "loss": 0.7034, + "step": 985 + }, + { + "epoch": 1.584, + "grad_norm": 0.7025273442268372, + "learning_rate": 9.121488034492569e-05, + "loss": 0.5973, + "step": 990 + }, + { + "epoch": 1.592, + "grad_norm": 0.9270740151405334, + "learning_rate": 9.038067945056227e-05, + "loss": 0.7877, + "step": 995 + }, + { + "epoch": 1.6, + "grad_norm": 0.6676818132400513, + "learning_rate": 8.954715367323468e-05, + "loss": 0.644, + "step": 1000 + }, + { + "epoch": 1.608, + "grad_norm": 0.6687547564506531, + "learning_rate": 8.871436151265184e-05, + "loss": 0.6678, + "step": 1005 + }, + { + "epoch": 1.616, + "grad_norm": 0.7400322556495667, + "learning_rate": 8.788236141703498e-05, + "loss": 0.6088, + "step": 1010 + }, + { + "epoch": 1.624, + "grad_norm": 0.5504963994026184, + "learning_rate": 8.705121177901532e-05, + "loss": 0.6219, + "step": 1015 + }, + { + "epoch": 1.6320000000000001, + "grad_norm": 0.8088738322257996, + "learning_rate": 8.62209709315362e-05, + "loss": 0.6698, + "step": 1020 + }, + { + "epoch": 1.6400000000000001, + "grad_norm": 0.7380816340446472, + "learning_rate": 8.539169714375885e-05, + "loss": 0.6207, + "step": 1025 + }, + { + "epoch": 1.6480000000000001, + "grad_norm": 0.6346850395202637, + "learning_rate": 8.456344861697289e-05, + "loss": 0.626, + "step": 1030 + }, + { + "epoch": 1.6560000000000001, + "grad_norm": 0.49918702244758606, + "learning_rate": 8.373628348051165e-05, + "loss": 0.6972, + "step": 1035 + }, + { + "epoch": 1.6640000000000001, + "grad_norm": 0.7200607657432556, + "learning_rate": 8.291025978767235e-05, + "loss": 0.6282, + "step": 1040 + }, + { + "epoch": 1.6720000000000002, + "grad_norm": 0.8350688815116882, + "learning_rate": 8.208543551164178e-05, + "loss": 0.6219, + "step": 1045 + }, + { + "epoch": 1.6800000000000002, + "grad_norm": 0.5957468748092651, + "learning_rate": 8.126186854142752e-05, + "loss": 0.622, + "step": 1050 + }, + { + "epoch": 1.688, + "grad_norm": 0.7494757175445557, + "learning_rate": 8.04396166777952e-05, + "loss": 0.5801, + "step": 1055 + }, + { + "epoch": 1.696, + "grad_norm": 0.7109113931655884, + "learning_rate": 7.961873762921153e-05, + "loss": 0.598, + "step": 1060 + }, + { + "epoch": 1.704, + "grad_norm": 0.7941219210624695, + "learning_rate": 7.879928900779456e-05, + "loss": 0.699, + "step": 1065 + }, + { + "epoch": 1.712, + "grad_norm": 0.7282152771949768, + "learning_rate": 7.798132832526986e-05, + "loss": 0.5882, + "step": 1070 + }, + { + "epoch": 1.72, + "grad_norm": 0.5923735499382019, + "learning_rate": 7.716491298893442e-05, + "loss": 0.5321, + "step": 1075 + }, + { + "epoch": 1.728, + "grad_norm": 0.5571523308753967, + "learning_rate": 7.635010029762756e-05, + "loss": 0.6647, + "step": 1080 + }, + { + "epoch": 1.736, + "grad_norm": 0.6043043732643127, + "learning_rate": 7.553694743770928e-05, + "loss": 0.628, + "step": 1085 + }, + { + "epoch": 1.744, + "grad_norm": 0.8733720183372498, + "learning_rate": 7.472551147904708e-05, + "loss": 0.6262, + "step": 1090 + }, + { + "epoch": 1.752, + "grad_norm": 0.5561848878860474, + "learning_rate": 7.391584937101033e-05, + "loss": 0.6131, + "step": 1095 + }, + { + "epoch": 1.76, + "grad_norm": 0.6611356735229492, + "learning_rate": 7.310801793847344e-05, + "loss": 0.6494, + "step": 1100 + }, + { + "epoch": 1.768, + "grad_norm": 0.5700333714485168, + "learning_rate": 7.230207387782776e-05, + "loss": 0.5514, + "step": 1105 + }, + { + "epoch": 1.776, + "grad_norm": 0.705662727355957, + "learning_rate": 7.149807375300239e-05, + "loss": 0.5823, + "step": 1110 + }, + { + "epoch": 1.784, + "grad_norm": 0.9632449150085449, + "learning_rate": 7.069607399149428e-05, + "loss": 0.7207, + "step": 1115 + }, + { + "epoch": 1.792, + "grad_norm": 0.6412242650985718, + "learning_rate": 6.989613088040796e-05, + "loss": 0.7006, + "step": 1120 + }, + { + "epoch": 1.8, + "grad_norm": 0.538044273853302, + "learning_rate": 6.909830056250527e-05, + "loss": 0.6222, + "step": 1125 + }, + { + "epoch": 1.808, + "grad_norm": 0.6124762296676636, + "learning_rate": 6.830263903226483e-05, + "loss": 0.6569, + "step": 1130 + }, + { + "epoch": 1.8159999999999998, + "grad_norm": 0.7250857949256897, + "learning_rate": 6.750920213195238e-05, + "loss": 0.543, + "step": 1135 + }, + { + "epoch": 1.8239999999999998, + "grad_norm": 0.6840488910675049, + "learning_rate": 6.671804554770135e-05, + "loss": 0.6334, + "step": 1140 + }, + { + "epoch": 1.8319999999999999, + "grad_norm": 0.8932304382324219, + "learning_rate": 6.592922480560483e-05, + "loss": 0.6701, + "step": 1145 + }, + { + "epoch": 1.8399999999999999, + "grad_norm": 0.9220761656761169, + "learning_rate": 6.51427952678185e-05, + "loss": 0.6216, + "step": 1150 + }, + { + "epoch": 1.8479999999999999, + "grad_norm": 0.688731849193573, + "learning_rate": 6.435881212867493e-05, + "loss": 0.5877, + "step": 1155 + }, + { + "epoch": 1.8559999999999999, + "grad_norm": 0.6370134949684143, + "learning_rate": 6.357733041081018e-05, + "loss": 0.6256, + "step": 1160 + }, + { + "epoch": 1.8639999999999999, + "grad_norm": 0.7421153783798218, + "learning_rate": 6.27984049613019e-05, + "loss": 0.7062, + "step": 1165 + }, + { + "epoch": 1.8719999999999999, + "grad_norm": 0.8129108548164368, + "learning_rate": 6.20220904478199e-05, + "loss": 0.6304, + "step": 1170 + }, + { + "epoch": 1.88, + "grad_norm": 0.7866687774658203, + "learning_rate": 6.12484413547897e-05, + "loss": 0.7695, + "step": 1175 + }, + { + "epoch": 1.888, + "grad_norm": 0.8307198882102966, + "learning_rate": 6.047751197956838e-05, + "loss": 0.5723, + "step": 1180 + }, + { + "epoch": 1.896, + "grad_norm": 0.5541536211967468, + "learning_rate": 5.9709356428633746e-05, + "loss": 0.6847, + "step": 1185 + }, + { + "epoch": 1.904, + "grad_norm": 0.731555163860321, + "learning_rate": 5.8944028613787206e-05, + "loss": 0.6618, + "step": 1190 + }, + { + "epoch": 1.912, + "grad_norm": 1.020991325378418, + "learning_rate": 5.818158224836987e-05, + "loss": 0.6275, + "step": 1195 + }, + { + "epoch": 1.92, + "grad_norm": 0.7172287702560425, + "learning_rate": 5.7422070843492734e-05, + "loss": 0.5617, + "step": 1200 + }, + { + "epoch": 1.928, + "grad_norm": 0.7232096791267395, + "learning_rate": 5.666554770428129e-05, + "loss": 0.6278, + "step": 1205 + }, + { + "epoch": 1.936, + "grad_norm": 0.7234194278717041, + "learning_rate": 5.591206592613416e-05, + "loss": 0.6713, + "step": 1210 + }, + { + "epoch": 1.944, + "grad_norm": 0.6906010508537292, + "learning_rate": 5.5161678390996796e-05, + "loss": 0.6113, + "step": 1215 + }, + { + "epoch": 1.952, + "grad_norm": 0.9057906270027161, + "learning_rate": 5.441443776365003e-05, + "loss": 0.5587, + "step": 1220 + }, + { + "epoch": 1.96, + "grad_norm": 0.7144932150840759, + "learning_rate": 5.3670396488013854e-05, + "loss": 0.5601, + "step": 1225 + }, + { + "epoch": 1.968, + "grad_norm": 1.0071252584457397, + "learning_rate": 5.292960678346675e-05, + "loss": 0.5941, + "step": 1230 + }, + { + "epoch": 1.976, + "grad_norm": 0.7061260342597961, + "learning_rate": 5.2192120641180786e-05, + "loss": 0.6285, + "step": 1235 + }, + { + "epoch": 1.984, + "grad_norm": 0.6937069892883301, + "learning_rate": 5.145798982047261e-05, + "loss": 0.6516, + "step": 1240 + }, + { + "epoch": 1.992, + "grad_norm": 0.5176392793655396, + "learning_rate": 5.072726584517086e-05, + "loss": 0.5904, + "step": 1245 + }, + { + "epoch": 2.0, + "grad_norm": 0.8501409292221069, + "learning_rate": 5.000000000000002e-05, + "loss": 0.619, + "step": 1250 + }, + { + "epoch": 2.008, + "grad_norm": 0.5451085567474365, + "learning_rate": 4.927624332698109e-05, + "loss": 0.6058, + "step": 1255 + }, + { + "epoch": 2.016, + "grad_norm": 0.8410437107086182, + "learning_rate": 4.8556046621849346e-05, + "loss": 0.6248, + "step": 1260 + }, + { + "epoch": 2.024, + "grad_norm": 0.6089378595352173, + "learning_rate": 4.783946043048923e-05, + "loss": 0.5247, + "step": 1265 + }, + { + "epoch": 2.032, + "grad_norm": 0.47437986731529236, + "learning_rate": 4.712653504538683e-05, + "loss": 0.5439, + "step": 1270 + }, + { + "epoch": 2.04, + "grad_norm": 0.6925654411315918, + "learning_rate": 4.6417320502100316e-05, + "loss": 0.4491, + "step": 1275 + }, + { + "epoch": 2.048, + "grad_norm": 0.8538162708282471, + "learning_rate": 4.5711866575748276e-05, + "loss": 0.52, + "step": 1280 + }, + { + "epoch": 2.056, + "grad_norm": 0.8048768639564514, + "learning_rate": 4.501022277751602e-05, + "loss": 0.5259, + "step": 1285 + }, + { + "epoch": 2.064, + "grad_norm": 0.7191641330718994, + "learning_rate": 4.431243835118124e-05, + "loss": 0.5025, + "step": 1290 + }, + { + "epoch": 2.072, + "grad_norm": 1.0918892621994019, + "learning_rate": 4.361856226965733e-05, + "loss": 0.4772, + "step": 1295 + }, + { + "epoch": 2.08, + "grad_norm": 0.8152824640274048, + "learning_rate": 4.2928643231556844e-05, + "loss": 0.5945, + "step": 1300 + }, + { + "epoch": 2.088, + "grad_norm": 0.640073299407959, + "learning_rate": 4.224272965777326e-05, + "loss": 0.4813, + "step": 1305 + }, + { + "epoch": 2.096, + "grad_norm": 0.7172432541847229, + "learning_rate": 4.15608696880828e-05, + "loss": 0.5315, + "step": 1310 + }, + { + "epoch": 2.104, + "grad_norm": 1.1283674240112305, + "learning_rate": 4.08831111777658e-05, + "loss": 0.5591, + "step": 1315 + }, + { + "epoch": 2.112, + "grad_norm": 0.8184736967086792, + "learning_rate": 4.020950169424815e-05, + "loss": 0.605, + "step": 1320 + }, + { + "epoch": 2.12, + "grad_norm": 0.6823618412017822, + "learning_rate": 3.954008851376252e-05, + "loss": 0.4955, + "step": 1325 + }, + { + "epoch": 2.128, + "grad_norm": 0.8576385378837585, + "learning_rate": 3.887491861803085e-05, + "loss": 0.5757, + "step": 1330 + }, + { + "epoch": 2.136, + "grad_norm": 0.967835009098053, + "learning_rate": 3.821403869096658e-05, + "loss": 0.5313, + "step": 1335 + }, + { + "epoch": 2.144, + "grad_norm": 0.7330173254013062, + "learning_rate": 3.755749511539845e-05, + "loss": 0.5904, + "step": 1340 + }, + { + "epoch": 2.152, + "grad_norm": 0.6664792895317078, + "learning_rate": 3.690533396981504e-05, + "loss": 0.4679, + "step": 1345 + }, + { + "epoch": 2.16, + "grad_norm": 0.7639065980911255, + "learning_rate": 3.6257601025131026e-05, + "loss": 0.5235, + "step": 1350 + }, + { + "epoch": 2.168, + "grad_norm": 0.6960520148277283, + "learning_rate": 3.561434174147463e-05, + "loss": 0.5797, + "step": 1355 + }, + { + "epoch": 2.176, + "grad_norm": 0.8231356143951416, + "learning_rate": 3.497560126499709e-05, + "loss": 0.5772, + "step": 1360 + }, + { + "epoch": 2.184, + "grad_norm": 0.8968437910079956, + "learning_rate": 3.4341424424704375e-05, + "loss": 0.5316, + "step": 1365 + }, + { + "epoch": 2.192, + "grad_norm": 0.8037480711936951, + "learning_rate": 3.371185572931048e-05, + "loss": 0.5646, + "step": 1370 + }, + { + "epoch": 2.2, + "grad_norm": 0.9330148696899414, + "learning_rate": 3.308693936411421e-05, + "loss": 0.5431, + "step": 1375 + }, + { + "epoch": 2.208, + "grad_norm": 0.6958775520324707, + "learning_rate": 3.246671918789755e-05, + "loss": 0.5403, + "step": 1380 + }, + { + "epoch": 2.216, + "grad_norm": 0.9870476126670837, + "learning_rate": 3.1851238729848034e-05, + "loss": 0.5329, + "step": 1385 + }, + { + "epoch": 2.224, + "grad_norm": 0.5036590099334717, + "learning_rate": 3.124054118650327e-05, + "loss": 0.5696, + "step": 1390 + }, + { + "epoch": 2.232, + "grad_norm": 0.8640053868293762, + "learning_rate": 3.063466941871952e-05, + "loss": 0.59, + "step": 1395 + }, + { + "epoch": 2.24, + "grad_norm": 0.6065173149108887, + "learning_rate": 3.0033665948663448e-05, + "loss": 0.5116, + "step": 1400 + }, + { + "epoch": 2.248, + "grad_norm": 1.083775520324707, + "learning_rate": 2.9437572956827964e-05, + "loss": 0.5783, + "step": 1405 + }, + { + "epoch": 2.2560000000000002, + "grad_norm": 0.7090497016906738, + "learning_rate": 2.8846432279071467e-05, + "loss": 0.6259, + "step": 1410 + }, + { + "epoch": 2.2640000000000002, + "grad_norm": 0.742468535900116, + "learning_rate": 2.826028540368215e-05, + "loss": 0.5759, + "step": 1415 + }, + { + "epoch": 2.2720000000000002, + "grad_norm": 0.9219839572906494, + "learning_rate": 2.7679173468465812e-05, + "loss": 0.497, + "step": 1420 + }, + { + "epoch": 2.2800000000000002, + "grad_norm": 0.7159206867218018, + "learning_rate": 2.7103137257858868e-05, + "loss": 0.619, + "step": 1425 + }, + { + "epoch": 2.288, + "grad_norm": 0.6997727751731873, + "learning_rate": 2.6532217200065858e-05, + "loss": 0.5858, + "step": 1430 + }, + { + "epoch": 2.296, + "grad_norm": 0.7493643164634705, + "learning_rate": 2.5966453364222186e-05, + "loss": 0.6291, + "step": 1435 + }, + { + "epoch": 2.304, + "grad_norm": 0.8311699032783508, + "learning_rate": 2.540588545758179e-05, + "loss": 0.6418, + "step": 1440 + }, + { + "epoch": 2.312, + "grad_norm": 0.7084354758262634, + "learning_rate": 2.48505528227304e-05, + "loss": 0.5483, + "step": 1445 + }, + { + "epoch": 2.32, + "grad_norm": 0.734438955783844, + "learning_rate": 2.4300494434824373e-05, + "loss": 0.6071, + "step": 1450 + }, + { + "epoch": 2.328, + "grad_norm": 0.8913635015487671, + "learning_rate": 2.37557488988552e-05, + "loss": 0.5099, + "step": 1455 + }, + { + "epoch": 2.336, + "grad_norm": 0.8349048495292664, + "learning_rate": 2.321635444694028e-05, + "loss": 0.5186, + "step": 1460 + }, + { + "epoch": 2.344, + "grad_norm": 0.6164011359214783, + "learning_rate": 2.2682348935639274e-05, + "loss": 0.5043, + "step": 1465 + }, + { + "epoch": 2.352, + "grad_norm": 1.044892430305481, + "learning_rate": 2.2153769843297667e-05, + "loss": 0.61, + "step": 1470 + }, + { + "epoch": 2.36, + "grad_norm": 0.9142879247665405, + "learning_rate": 2.163065426741603e-05, + "loss": 0.5987, + "step": 1475 + }, + { + "epoch": 2.368, + "grad_norm": 0.6232836842536926, + "learning_rate": 2.1113038922046602e-05, + "loss": 0.5212, + "step": 1480 + }, + { + "epoch": 2.376, + "grad_norm": 0.49558231234550476, + "learning_rate": 2.0600960135216462e-05, + "loss": 0.4796, + "step": 1485 + }, + { + "epoch": 2.384, + "grad_norm": 0.7887687683105469, + "learning_rate": 2.009445384637805e-05, + "loss": 0.4844, + "step": 1490 + }, + { + "epoch": 2.392, + "grad_norm": 0.8086990714073181, + "learning_rate": 1.9593555603886538e-05, + "loss": 0.5085, + "step": 1495 + }, + { + "epoch": 2.4, + "grad_norm": 0.6713303327560425, + "learning_rate": 1.9098300562505266e-05, + "loss": 0.4839, + "step": 1500 + }, + { + "epoch": 2.408, + "grad_norm": 0.6262741684913635, + "learning_rate": 1.8608723480938206e-05, + "loss": 0.5715, + "step": 1505 + }, + { + "epoch": 2.416, + "grad_norm": 0.8025808334350586, + "learning_rate": 1.812485871939056e-05, + "loss": 0.5266, + "step": 1510 + }, + { + "epoch": 2.424, + "grad_norm": 0.8753231167793274, + "learning_rate": 1.7646740237157256e-05, + "loss": 0.5422, + "step": 1515 + }, + { + "epoch": 2.432, + "grad_norm": 0.6459301710128784, + "learning_rate": 1.7174401590239587e-05, + "loss": 0.5553, + "step": 1520 + }, + { + "epoch": 2.44, + "grad_norm": 0.6917416453361511, + "learning_rate": 1.6707875928990058e-05, + "loss": 0.5765, + "step": 1525 + }, + { + "epoch": 2.448, + "grad_norm": 0.7890029549598694, + "learning_rate": 1.6247195995785837e-05, + "loss": 0.549, + "step": 1530 + }, + { + "epoch": 2.456, + "grad_norm": 0.9913660883903503, + "learning_rate": 1.579239412273078e-05, + "loss": 0.4876, + "step": 1535 + }, + { + "epoch": 2.464, + "grad_norm": 0.9030985832214355, + "learning_rate": 1.5343502229386207e-05, + "loss": 0.5546, + "step": 1540 + }, + { + "epoch": 2.472, + "grad_norm": 0.9133403301239014, + "learning_rate": 1.4900551820530828e-05, + "loss": 0.5356, + "step": 1545 + }, + { + "epoch": 2.48, + "grad_norm": 0.7083793878555298, + "learning_rate": 1.4463573983949341e-05, + "loss": 0.5142, + "step": 1550 + }, + { + "epoch": 2.488, + "grad_norm": 1.095435619354248, + "learning_rate": 1.40325993882509e-05, + "loss": 0.6054, + "step": 1555 + }, + { + "epoch": 2.496, + "grad_norm": 0.8825190663337708, + "learning_rate": 1.3607658280716473e-05, + "loss": 0.5294, + "step": 1560 + }, + { + "epoch": 2.504, + "grad_norm": 0.9436343908309937, + "learning_rate": 1.3188780485176088e-05, + "loss": 0.5294, + "step": 1565 + }, + { + "epoch": 2.512, + "grad_norm": 1.0125439167022705, + "learning_rate": 1.2775995399915631e-05, + "loss": 0.4905, + "step": 1570 + }, + { + "epoch": 2.52, + "grad_norm": 0.8476350903511047, + "learning_rate": 1.2369331995613665e-05, + "loss": 0.5186, + "step": 1575 + }, + { + "epoch": 2.528, + "grad_norm": 0.9092681407928467, + "learning_rate": 1.196881881330798e-05, + "loss": 0.4909, + "step": 1580 + }, + { + "epoch": 2.536, + "grad_norm": 0.7970360517501831, + "learning_rate": 1.1574483962392767e-05, + "loss": 0.5303, + "step": 1585 + }, + { + "epoch": 2.544, + "grad_norm": 0.8575041890144348, + "learning_rate": 1.1186355118645554e-05, + "loss": 0.5169, + "step": 1590 + }, + { + "epoch": 2.552, + "grad_norm": 0.7397408485412598, + "learning_rate": 1.0804459522284926e-05, + "loss": 0.5339, + "step": 1595 + }, + { + "epoch": 2.56, + "grad_norm": 0.7415968179702759, + "learning_rate": 1.042882397605871e-05, + "loss": 0.5283, + "step": 1600 + }, + { + "epoch": 2.568, + "grad_norm": 0.7035180926322937, + "learning_rate": 1.0059474843362892e-05, + "loss": 0.5576, + "step": 1605 + }, + { + "epoch": 2.576, + "grad_norm": 0.9805112481117249, + "learning_rate": 9.696438046391288e-06, + "loss": 0.5136, + "step": 1610 + }, + { + "epoch": 2.584, + "grad_norm": 0.6661838889122009, + "learning_rate": 9.339739064316233e-06, + "loss": 0.5885, + "step": 1615 + }, + { + "epoch": 2.592, + "grad_norm": 0.8581559062004089, + "learning_rate": 8.989402931500434e-06, + "loss": 0.5, + "step": 1620 + }, + { + "epoch": 2.6, + "grad_norm": 0.7146279811859131, + "learning_rate": 8.645454235739903e-06, + "loss": 0.5325, + "step": 1625 + }, + { + "epoch": 2.608, + "grad_norm": 0.9474234580993652, + "learning_rate": 8.307917116538378e-06, + "loss": 0.5772, + "step": 1630 + }, + { + "epoch": 2.616, + "grad_norm": 0.9583209753036499, + "learning_rate": 7.976815263412963e-06, + "loss": 0.5736, + "step": 1635 + }, + { + "epoch": 2.624, + "grad_norm": 0.7156705260276794, + "learning_rate": 7.652171914231776e-06, + "loss": 0.5199, + "step": 1640 + }, + { + "epoch": 2.632, + "grad_norm": 0.8224849700927734, + "learning_rate": 7.3340098535827905e-06, + "loss": 0.5753, + "step": 1645 + }, + { + "epoch": 2.64, + "grad_norm": 0.8689257502555847, + "learning_rate": 7.022351411174866e-06, + "loss": 0.5424, + "step": 1650 + }, + { + "epoch": 2.648, + "grad_norm": 0.6636053323745728, + "learning_rate": 6.717218460270536e-06, + "loss": 0.5555, + "step": 1655 + }, + { + "epoch": 2.656, + "grad_norm": 0.8688860535621643, + "learning_rate": 6.418632416150927e-06, + "loss": 0.4936, + "step": 1660 + }, + { + "epoch": 2.664, + "grad_norm": 0.6272854208946228, + "learning_rate": 6.126614234612593e-06, + "loss": 0.6291, + "step": 1665 + }, + { + "epoch": 2.672, + "grad_norm": 1.2240337133407593, + "learning_rate": 5.8411844104969916e-06, + "loss": 0.5197, + "step": 1670 + }, + { + "epoch": 2.68, + "grad_norm": 0.9820936918258667, + "learning_rate": 5.562362976251901e-06, + "loss": 0.5398, + "step": 1675 + }, + { + "epoch": 2.6879999999999997, + "grad_norm": 1.1582359075546265, + "learning_rate": 5.290169500525577e-06, + "loss": 0.6059, + "step": 1680 + }, + { + "epoch": 2.6959999999999997, + "grad_norm": 0.5501114726066589, + "learning_rate": 5.024623086793323e-06, + "loss": 0.531, + "step": 1685 + }, + { + "epoch": 2.7039999999999997, + "grad_norm": 0.8848717212677002, + "learning_rate": 4.765742372016735e-06, + "loss": 0.6054, + "step": 1690 + }, + { + "epoch": 2.7119999999999997, + "grad_norm": 0.7358693480491638, + "learning_rate": 4.513545525335705e-06, + "loss": 0.5173, + "step": 1695 + }, + { + "epoch": 2.7199999999999998, + "grad_norm": 0.9218215942382812, + "learning_rate": 4.268050246793276e-06, + "loss": 0.4944, + "step": 1700 + }, + { + "epoch": 2.7279999999999998, + "grad_norm": 0.6374716758728027, + "learning_rate": 4.029273766093333e-06, + "loss": 0.5183, + "step": 1705 + }, + { + "epoch": 2.7359999999999998, + "grad_norm": 0.583243191242218, + "learning_rate": 3.797232841391407e-06, + "loss": 0.668, + "step": 1710 + }, + { + "epoch": 2.7439999999999998, + "grad_norm": 0.8384690284729004, + "learning_rate": 3.5719437581185454e-06, + "loss": 0.5068, + "step": 1715 + }, + { + "epoch": 2.752, + "grad_norm": 0.8034130334854126, + "learning_rate": 3.3534223278382405e-06, + "loss": 0.5823, + "step": 1720 + }, + { + "epoch": 2.76, + "grad_norm": 0.8146041631698608, + "learning_rate": 3.1416838871368924e-06, + "loss": 0.6111, + "step": 1725 + }, + { + "epoch": 2.768, + "grad_norm": 0.8122982382774353, + "learning_rate": 2.936743296547273e-06, + "loss": 0.5231, + "step": 1730 + }, + { + "epoch": 2.776, + "grad_norm": 0.7326982021331787, + "learning_rate": 2.738614939505646e-06, + "loss": 0.5236, + "step": 1735 + }, + { + "epoch": 2.784, + "grad_norm": 0.7472147345542908, + "learning_rate": 2.5473127213422763e-06, + "loss": 0.5657, + "step": 1740 + }, + { + "epoch": 2.792, + "grad_norm": 0.8197700381278992, + "learning_rate": 2.3628500683055222e-06, + "loss": 0.5518, + "step": 1745 + }, + { + "epoch": 2.8, + "grad_norm": 0.8733732104301453, + "learning_rate": 2.1852399266194314e-06, + "loss": 0.4908, + "step": 1750 + }, + { + "epoch": 2.808, + "grad_norm": 0.8913092017173767, + "learning_rate": 2.014494761575314e-06, + "loss": 0.5459, + "step": 1755 + }, + { + "epoch": 2.816, + "grad_norm": 1.1259772777557373, + "learning_rate": 1.8506265566567094e-06, + "loss": 0.5208, + "step": 1760 + }, + { + "epoch": 2.824, + "grad_norm": 0.7692184448242188, + "learning_rate": 1.6936468126984572e-06, + "loss": 0.5824, + "step": 1765 + }, + { + "epoch": 2.832, + "grad_norm": 0.588602602481842, + "learning_rate": 1.543566547079467e-06, + "loss": 0.5512, + "step": 1770 + }, + { + "epoch": 2.84, + "grad_norm": 0.6324055790901184, + "learning_rate": 1.400396292949513e-06, + "loss": 0.6327, + "step": 1775 + }, + { + "epoch": 2.848, + "grad_norm": 0.7608378529548645, + "learning_rate": 1.26414609848996e-06, + "loss": 0.5292, + "step": 1780 + }, + { + "epoch": 2.856, + "grad_norm": 0.7972851395606995, + "learning_rate": 1.134825526208605e-06, + "loss": 0.5692, + "step": 1785 + }, + { + "epoch": 2.864, + "grad_norm": 0.9705446362495422, + "learning_rate": 1.0124436522684243e-06, + "loss": 0.5532, + "step": 1790 + }, + { + "epoch": 2.872, + "grad_norm": 0.6317399144172668, + "learning_rate": 8.970090658507291e-07, + "loss": 0.5314, + "step": 1795 + }, + { + "epoch": 2.88, + "grad_norm": 0.6457757949829102, + "learning_rate": 7.885298685522235e-07, + "loss": 0.524, + "step": 1800 + }, + { + "epoch": 2.888, + "grad_norm": 0.8593656420707703, + "learning_rate": 6.870136738164612e-07, + "loss": 0.5227, + "step": 1805 + }, + { + "epoch": 2.896, + "grad_norm": 1.0187020301818848, + "learning_rate": 5.924676063995382e-07, + "loss": 0.5993, + "step": 1810 + }, + { + "epoch": 2.904, + "grad_norm": 0.7082214951515198, + "learning_rate": 5.048983018699827e-07, + "loss": 0.5618, + "step": 1815 + }, + { + "epoch": 2.912, + "grad_norm": 0.6521438956260681, + "learning_rate": 4.2431190614309335e-07, + "loss": 0.5504, + "step": 1820 + }, + { + "epoch": 2.92, + "grad_norm": 0.8906036615371704, + "learning_rate": 3.50714075049563e-07, + "loss": 0.5147, + "step": 1825 + }, + { + "epoch": 2.928, + "grad_norm": 1.0908008813858032, + "learning_rate": 2.841099739386066e-07, + "loss": 0.5564, + "step": 1830 + }, + { + "epoch": 2.936, + "grad_norm": 0.6374122500419617, + "learning_rate": 2.2450427731534053e-07, + "loss": 0.5188, + "step": 1835 + }, + { + "epoch": 2.944, + "grad_norm": 0.9616740345954895, + "learning_rate": 1.7190116851280026e-07, + "loss": 0.5438, + "step": 1840 + }, + { + "epoch": 2.952, + "grad_norm": 1.0712924003601074, + "learning_rate": 1.2630433939825327e-07, + "loss": 0.4962, + "step": 1845 + }, + { + "epoch": 2.96, + "grad_norm": 0.8226613998413086, + "learning_rate": 8.771699011416168e-08, + "loss": 0.5021, + "step": 1850 + }, + { + "epoch": 2.968, + "grad_norm": 0.9519492983818054, + "learning_rate": 5.6141828853573106e-08, + "loss": 0.5277, + "step": 1855 + }, + { + "epoch": 2.976, + "grad_norm": 0.9817518591880798, + "learning_rate": 3.1581071670006015e-08, + "loss": 0.5764, + "step": 1860 + }, + { + "epoch": 2.984, + "grad_norm": 0.7039242386817932, + "learning_rate": 1.4036442321962995e-08, + "loss": 0.5408, + "step": 1865 + }, + { + "epoch": 2.992, + "grad_norm": 0.591012179851532, + "learning_rate": 3.509172151938689e-09, + "loss": 0.5014, + "step": 1870 + }, + { + "epoch": 3.0, + "grad_norm": 0.7672661542892456, + "learning_rate": 0.0, + "loss": 0.6182, + "step": 1875 + }, + { + "epoch": 3.0, + "step": 1875, + "total_flos": 1.1764404625814323e+17, + "train_loss": 0.6465437274932861, + "train_runtime": 2365.2475, + "train_samples_per_second": 12.684, + "train_steps_per_second": 0.793 + } + ], + "logging_steps": 5, + "max_steps": 1875, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.1764404625814323e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b72392a9094690809605a9b89988a05f68c770c0 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f6303cd7c1754cff6fd463f0e4dddf2a566ba7f29bbf2a72fbb560b32d5ce98 +size 5432 diff --git a/training_args.yaml b/training_args.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d3531daa1b6021e6cd4149789c5764370e389fe0 --- /dev/null +++ b/training_args.yaml @@ -0,0 +1,33 @@ +bf16: true +cutoff_len: 1024 +dataset: mathinstruct +dataset_dir: data +ddp_timeout: 180000000 +do_train: true +finetuning_type: lora +flash_attn: auto +gradient_accumulation_steps: 1 +learning_rate: 0.0002 +logging_steps: 5 +lora_alpha: 16 +lora_dropout: 0 +lora_rank: 8 +lora_target: all +lr_scheduler_type: cosine +max_grad_norm: 1.0 +max_samples: 10000 +model_name_or_path: meta-llama/Llama-3.2-1B +num_train_epochs: 3.0 +optim: adamw_torch +output_dir: saves/Llama-3.2-1B/lora/llama3.2-1b +packing: false +per_device_train_batch_size: 16 +plot_loss: true +preprocessing_num_workers: 16 +report_to: none +rope_scaling: linear +save_steps: 100 +stage: sft +template: default +trust_remote_code: true +warmup_steps: 0 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..2b176a65172e62e3c82d61a73bf727642ee89055 Binary files /dev/null and b/training_loss.png differ