diff --git a/checkpoint-10/README.md b/checkpoint-10/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2c644dc0050293b428fdc45a928acfadeace0ae7 --- /dev/null +++ b/checkpoint-10/README.md @@ -0,0 +1,218 @@ +--- +library_name: peft +base_model: ybelkada/Mistral-7B-v0.1-bf16-sharded +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + +- PEFT 0.6.3.dev0 \ No newline at end of file diff --git a/checkpoint-10/adapter_config.json b/checkpoint-10/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..98beeb019e9c43b6a8655b280b604de3df39e2e2 --- /dev/null +++ b/checkpoint-10/adapter_config.json @@ -0,0 +1,30 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "ybelkada/Mistral-7B-v0.1-bf16-sharded", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "lm_head", + "v_proj", + "q_proj", + "up_proj", + "down_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-10/adapter_model.safetensors b/checkpoint-10/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..53be1ac30640ce613049427fe037a5e0e9dee2a9 --- /dev/null +++ b/checkpoint-10/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61fe345fcde39c53ad9da927d5d32fb1e3d32848a6d544a7cbfc30dc114c939c +size 609389712 diff --git a/checkpoint-10/optimizer.pt b/checkpoint-10/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4055414becccc4cdd2eacaf3c2a49a55e36390d0 --- /dev/null +++ b/checkpoint-10/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f15f5190c7f9355527151fd5422d3e115b32c265bfff8782c2280a4db20867d +size 43126684 diff --git a/checkpoint-10/rng_state.pth b/checkpoint-10/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..390173b53b89a78da38c01fe984bc7ea10e52f8c --- /dev/null +++ b/checkpoint-10/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:319b0ac84081c7cde39985f443e46d3f8b0a04617912b0f3cd6ba6b182f4a439 +size 14244 diff --git a/checkpoint-10/scheduler.pt b/checkpoint-10/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b22e9b79ec7c55466a8d9d074ded14a755854c7 --- /dev/null +++ b/checkpoint-10/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb66a3937ad2f5bcb727b664c93e583feeff1fdd77d67933d257e85a9a6aea0c +size 1064 diff --git a/checkpoint-10/trainer_state.json b/checkpoint-10/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..eb7f0c864b17036ff7674749938c0a78b341ce5a --- /dev/null +++ b/checkpoint-10/trainer_state.json @@ -0,0 +1,34 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.19230769230769232, + "eval_steps": 10, + "global_step": 10, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.19, + "learning_rate": 2.272727272727273e-05, + "loss": 1.7377, + "step": 10 + }, + { + "epoch": 0.19, + "eval_loss": 1.3826087713241577, + "eval_runtime": 164.229, + "eval_samples_per_second": 0.304, + "eval_steps_per_second": 0.043, + "step": 10 + } + ], + "logging_steps": 10, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 10, + "total_flos": 438185188392960.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-10/training_args.bin b/checkpoint-10/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..cb63784509221847be48d014818210f672680633 --- /dev/null +++ b/checkpoint-10/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28f84457fcf5aef029a467534eb875435a77ddb3edb5e81e398d5b46ff0577c1 +size 4600 diff --git a/checkpoint-100/README.md b/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2c644dc0050293b428fdc45a928acfadeace0ae7 --- /dev/null +++ b/checkpoint-100/README.md @@ -0,0 +1,218 @@ +--- +library_name: peft +base_model: ybelkada/Mistral-7B-v0.1-bf16-sharded +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + +- PEFT 0.6.3.dev0 \ No newline at end of file diff --git a/checkpoint-100/adapter_config.json b/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..98beeb019e9c43b6a8655b280b604de3df39e2e2 --- /dev/null +++ b/checkpoint-100/adapter_config.json @@ -0,0 +1,30 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "ybelkada/Mistral-7B-v0.1-bf16-sharded", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "lm_head", + "v_proj", + "q_proj", + "up_proj", + "down_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-100/adapter_model.safetensors b/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c9328ca6fdfb9d52859cffd2a172e874bf62103b --- /dev/null +++ b/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd01c7dbf4acfdb2345d6aaa73265f7809d2fe28d371141536b2c76117725ca3 +size 609389712 diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc8bce0038d11b193cf29ada7cb106a25d31d735 --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a5ed83eea962bb89e7e7f3f010456bfb28394bc1d8adf99a22f45a858778e24 +size 43126684 diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..51e43827d67012834d588e77fbabff1dd726df16 --- /dev/null +++ b/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84758f1c2b3318c5cad1ab14ce22b7bac97d0c5c7fe0356290b9ec5a54c3b304 +size 14244 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..72e6de0519f5b4b103b9859282ca0849af527615 --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9199f8b2981cc2bd25ef6f293bbae160e920093cc7ed02d042b4e2fce6e9d3dc +size 1064 diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a9c368cae11cf84ab0473d15405202978a390458 --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,160 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9230769230769231, + "eval_steps": 10, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.19, + "learning_rate": 2.272727272727273e-05, + "loss": 1.7377, + "step": 10 + }, + { + "epoch": 0.19, + "eval_loss": 1.3826087713241577, + "eval_runtime": 164.229, + "eval_samples_per_second": 0.304, + "eval_steps_per_second": 0.043, + "step": 10 + }, + { + "epoch": 0.38, + "learning_rate": 2.0202020202020203e-05, + "loss": 1.0247, + "step": 20 + }, + { + "epoch": 0.38, + "eval_loss": 0.6683715581893921, + "eval_runtime": 163.0119, + "eval_samples_per_second": 0.307, + "eval_steps_per_second": 0.043, + "step": 20 + }, + { + "epoch": 0.58, + "learning_rate": 1.7676767676767676e-05, + "loss": 0.4914, + "step": 30 + }, + { + "epoch": 0.58, + "eval_loss": 0.34242257475852966, + "eval_runtime": 164.0724, + "eval_samples_per_second": 0.305, + "eval_steps_per_second": 0.043, + "step": 30 + }, + { + "epoch": 0.77, + "learning_rate": 1.5151515151515153e-05, + "loss": 0.3506, + "step": 40 + }, + { + "epoch": 0.77, + "eval_loss": 0.3160565197467804, + "eval_runtime": 163.8993, + "eval_samples_per_second": 0.305, + "eval_steps_per_second": 0.043, + "step": 40 + }, + { + "epoch": 0.96, + "learning_rate": 1.2626262626262628e-05, + "loss": 0.3133, + "step": 50 + }, + { + "epoch": 0.96, + "eval_loss": 0.3000437915325165, + "eval_runtime": 163.2552, + "eval_samples_per_second": 0.306, + "eval_steps_per_second": 0.043, + "step": 50 + }, + { + "epoch": 1.15, + "learning_rate": 1.0101010101010101e-05, + "loss": 0.3185, + "step": 60 + }, + { + "epoch": 1.15, + "eval_loss": 0.2911369800567627, + "eval_runtime": 162.9849, + "eval_samples_per_second": 0.307, + "eval_steps_per_second": 0.043, + "step": 60 + }, + { + "epoch": 1.35, + "learning_rate": 7.5757575757575764e-06, + "loss": 0.2703, + "step": 70 + }, + { + "epoch": 1.35, + "eval_loss": 0.2851818799972534, + "eval_runtime": 163.5768, + "eval_samples_per_second": 0.306, + "eval_steps_per_second": 0.043, + "step": 70 + }, + { + "epoch": 1.54, + "learning_rate": 5.050505050505051e-06, + "loss": 0.2451, + "step": 80 + }, + { + "epoch": 1.54, + "eval_loss": 0.27840811014175415, + "eval_runtime": 163.9369, + "eval_samples_per_second": 0.305, + "eval_steps_per_second": 0.043, + "step": 80 + }, + { + "epoch": 1.73, + "learning_rate": 2.5252525252525253e-06, + "loss": 0.2702, + "step": 90 + }, + { + "epoch": 1.73, + "eval_loss": 0.2748894989490509, + "eval_runtime": 162.9364, + "eval_samples_per_second": 0.307, + "eval_steps_per_second": 0.043, + "step": 90 + }, + { + "epoch": 1.92, + "learning_rate": 0.0, + "loss": 0.2489, + "step": 100 + }, + { + "epoch": 1.92, + "eval_loss": 0.27365821599960327, + "eval_runtime": 164.1174, + "eval_samples_per_second": 0.305, + "eval_steps_per_second": 0.043, + "step": 100 + } + ], + "logging_steps": 10, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 10, + "total_flos": 4359942624509952.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..cb63784509221847be48d014818210f672680633 --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28f84457fcf5aef029a467534eb875435a77ddb3edb5e81e398d5b46ff0577c1 +size 4600 diff --git a/checkpoint-20/README.md b/checkpoint-20/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2c644dc0050293b428fdc45a928acfadeace0ae7 --- /dev/null +++ b/checkpoint-20/README.md @@ -0,0 +1,218 @@ +--- +library_name: peft +base_model: ybelkada/Mistral-7B-v0.1-bf16-sharded +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + +- PEFT 0.6.3.dev0 \ No newline at end of file diff --git a/checkpoint-20/adapter_config.json b/checkpoint-20/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..98beeb019e9c43b6a8655b280b604de3df39e2e2 --- /dev/null +++ b/checkpoint-20/adapter_config.json @@ -0,0 +1,30 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "ybelkada/Mistral-7B-v0.1-bf16-sharded", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "lm_head", + "v_proj", + "q_proj", + "up_proj", + "down_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-20/adapter_model.safetensors b/checkpoint-20/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ff1175f9e69a646872cd9456877735c74c2b9c8 --- /dev/null +++ b/checkpoint-20/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebd7c154c608dd256102999dae2a02269fb581ae6a34fdc258ae27cab1a84263 +size 609389712 diff --git a/checkpoint-20/optimizer.pt b/checkpoint-20/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c8dc6e7440498a7483953934b81065243848ec1 --- /dev/null +++ b/checkpoint-20/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4db400651a496423620b2e0d7f8f4e61108ad2d53a032f5d8b56435754ebedb6 +size 43126684 diff --git a/checkpoint-20/rng_state.pth b/checkpoint-20/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ffccded36626280a8f82442b348c84d03e44ff2b --- /dev/null +++ b/checkpoint-20/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3192ae602bff93d9873357b7c3242a56ce1180c7dec1ec698a898bc2dca8d298 +size 14244 diff --git a/checkpoint-20/scheduler.pt b/checkpoint-20/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6b2aa120f69539606bb996c0e32db55e259c47b1 --- /dev/null +++ b/checkpoint-20/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e3bbb5497877a8105ebc18ac7f4c4240b0268dcc17427b41b81c70ebfcd195c +size 1064 diff --git a/checkpoint-20/trainer_state.json b/checkpoint-20/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..91d40041f85c8e5998559b149ccbd224dc18d52e --- /dev/null +++ b/checkpoint-20/trainer_state.json @@ -0,0 +1,48 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.38461538461538464, + "eval_steps": 10, + "global_step": 20, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.19, + "learning_rate": 2.272727272727273e-05, + "loss": 1.7377, + "step": 10 + }, + { + "epoch": 0.19, + "eval_loss": 1.3826087713241577, + "eval_runtime": 164.229, + "eval_samples_per_second": 0.304, + "eval_steps_per_second": 0.043, + "step": 10 + }, + { + "epoch": 0.38, + "learning_rate": 2.0202020202020203e-05, + "loss": 1.0247, + "step": 20 + }, + { + "epoch": 0.38, + "eval_loss": 0.6683715581893921, + "eval_runtime": 163.0119, + "eval_samples_per_second": 0.307, + "eval_steps_per_second": 0.043, + "step": 20 + } + ], + "logging_steps": 10, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 10, + "total_flos": 876370376785920.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-20/training_args.bin b/checkpoint-20/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..cb63784509221847be48d014818210f672680633 --- /dev/null +++ b/checkpoint-20/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28f84457fcf5aef029a467534eb875435a77ddb3edb5e81e398d5b46ff0577c1 +size 4600 diff --git a/checkpoint-30/README.md b/checkpoint-30/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2c644dc0050293b428fdc45a928acfadeace0ae7 --- /dev/null +++ b/checkpoint-30/README.md @@ -0,0 +1,218 @@ +--- +library_name: peft +base_model: ybelkada/Mistral-7B-v0.1-bf16-sharded +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + +- PEFT 0.6.3.dev0 \ No newline at end of file diff --git a/checkpoint-30/adapter_config.json b/checkpoint-30/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..98beeb019e9c43b6a8655b280b604de3df39e2e2 --- /dev/null +++ b/checkpoint-30/adapter_config.json @@ -0,0 +1,30 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "ybelkada/Mistral-7B-v0.1-bf16-sharded", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "lm_head", + "v_proj", + "q_proj", + "up_proj", + "down_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-30/adapter_model.safetensors b/checkpoint-30/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f3e0a011f29d8e7b238bd9180b38880fa04845a6 --- /dev/null +++ b/checkpoint-30/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dff79ade8f0cbb984fe662c6d30731b02fe9bafe8498350f48a9462f00db9756 +size 609389712 diff --git a/checkpoint-30/optimizer.pt b/checkpoint-30/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..aae2b1348f4978c2db060d9f8c5aa103c8025e8d --- /dev/null +++ b/checkpoint-30/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e70453a4587959468c5e2c2c9d0fad58cb2d296afd91ae07b2d2e8230fece7a +size 43126684 diff --git a/checkpoint-30/rng_state.pth b/checkpoint-30/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ba86e974df8a6d9e34f05d2c9a8babb8fad83bf3 --- /dev/null +++ b/checkpoint-30/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:331cde27f5c37368971942801503df6a56165c6e11d9203179910222f1ca8b38 +size 14244 diff --git a/checkpoint-30/scheduler.pt b/checkpoint-30/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..57d5053dd06adc616f29b5a318a0d1539612483e --- /dev/null +++ b/checkpoint-30/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4660b73a9a6ccbbdccf2827928238cb28f8e16476c815698434ac01013b89d5b +size 1064 diff --git a/checkpoint-30/trainer_state.json b/checkpoint-30/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..20f07ba223f91babfa92d08a994fbde551a16185 --- /dev/null +++ b/checkpoint-30/trainer_state.json @@ -0,0 +1,62 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5769230769230769, + "eval_steps": 10, + "global_step": 30, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.19, + "learning_rate": 2.272727272727273e-05, + "loss": 1.7377, + "step": 10 + }, + { + "epoch": 0.19, + "eval_loss": 1.3826087713241577, + "eval_runtime": 164.229, + "eval_samples_per_second": 0.304, + "eval_steps_per_second": 0.043, + "step": 10 + }, + { + "epoch": 0.38, + "learning_rate": 2.0202020202020203e-05, + "loss": 1.0247, + "step": 20 + }, + { + "epoch": 0.38, + "eval_loss": 0.6683715581893921, + "eval_runtime": 163.0119, + "eval_samples_per_second": 0.307, + "eval_steps_per_second": 0.043, + "step": 20 + }, + { + "epoch": 0.58, + "learning_rate": 1.7676767676767676e-05, + "loss": 0.4914, + "step": 30 + }, + { + "epoch": 0.58, + "eval_loss": 0.34242257475852966, + "eval_runtime": 164.0724, + "eval_samples_per_second": 0.305, + "eval_steps_per_second": 0.043, + "step": 30 + } + ], + "logging_steps": 10, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 10, + "total_flos": 1314555565178880.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-30/training_args.bin b/checkpoint-30/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..cb63784509221847be48d014818210f672680633 --- /dev/null +++ b/checkpoint-30/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28f84457fcf5aef029a467534eb875435a77ddb3edb5e81e398d5b46ff0577c1 +size 4600 diff --git a/checkpoint-40/README.md b/checkpoint-40/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2c644dc0050293b428fdc45a928acfadeace0ae7 --- /dev/null +++ b/checkpoint-40/README.md @@ -0,0 +1,218 @@ +--- +library_name: peft +base_model: ybelkada/Mistral-7B-v0.1-bf16-sharded +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + +- PEFT 0.6.3.dev0 \ No newline at end of file diff --git a/checkpoint-40/adapter_config.json b/checkpoint-40/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..98beeb019e9c43b6a8655b280b604de3df39e2e2 --- /dev/null +++ b/checkpoint-40/adapter_config.json @@ -0,0 +1,30 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "ybelkada/Mistral-7B-v0.1-bf16-sharded", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "lm_head", + "v_proj", + "q_proj", + "up_proj", + "down_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-40/adapter_model.safetensors b/checkpoint-40/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8edeceb05d1e42255675386afa964128bd046bc8 --- /dev/null +++ b/checkpoint-40/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99def190a2d3d1207165019cd257fbc0996fa81cb91a5227e0e85e7e7a3b2fa6 +size 609389712 diff --git a/checkpoint-40/optimizer.pt b/checkpoint-40/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1c228c501ce0affbafbb38efe8352454ecf61337 --- /dev/null +++ b/checkpoint-40/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce362b857de77d5afbabef8b688971031ab7f1fb5aae2142fb7fd721a063352f +size 43126684 diff --git a/checkpoint-40/rng_state.pth b/checkpoint-40/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e0b974c6785406ea821aceac1c8e224bb2e34a4f --- /dev/null +++ b/checkpoint-40/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02a27082fe9f0964489bac5f615981f33a2cd1258c8a541c1d952ac50fc31944 +size 14244 diff --git a/checkpoint-40/scheduler.pt b/checkpoint-40/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f4332c97c072f841bf114f798afc6f7f139b9195 --- /dev/null +++ b/checkpoint-40/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ad869be6ea152c57cf0213fc18955a4157bd155031ff1d60806177094bee090 +size 1064 diff --git a/checkpoint-40/trainer_state.json b/checkpoint-40/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b5b9e7f3e02c4fd4d4bbac13cd67cca367f8609e --- /dev/null +++ b/checkpoint-40/trainer_state.json @@ -0,0 +1,76 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7692307692307693, + "eval_steps": 10, + "global_step": 40, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.19, + "learning_rate": 2.272727272727273e-05, + "loss": 1.7377, + "step": 10 + }, + { + "epoch": 0.19, + "eval_loss": 1.3826087713241577, + "eval_runtime": 164.229, + "eval_samples_per_second": 0.304, + "eval_steps_per_second": 0.043, + "step": 10 + }, + { + "epoch": 0.38, + "learning_rate": 2.0202020202020203e-05, + "loss": 1.0247, + "step": 20 + }, + { + "epoch": 0.38, + "eval_loss": 0.6683715581893921, + "eval_runtime": 163.0119, + "eval_samples_per_second": 0.307, + "eval_steps_per_second": 0.043, + "step": 20 + }, + { + "epoch": 0.58, + "learning_rate": 1.7676767676767676e-05, + "loss": 0.4914, + "step": 30 + }, + { + "epoch": 0.58, + "eval_loss": 0.34242257475852966, + "eval_runtime": 164.0724, + "eval_samples_per_second": 0.305, + "eval_steps_per_second": 0.043, + "step": 30 + }, + { + "epoch": 0.77, + "learning_rate": 1.5151515151515153e-05, + "loss": 0.3506, + "step": 40 + }, + { + "epoch": 0.77, + "eval_loss": 0.3160565197467804, + "eval_runtime": 163.8993, + "eval_samples_per_second": 0.305, + "eval_steps_per_second": 0.043, + "step": 40 + } + ], + "logging_steps": 10, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 10, + "total_flos": 1752740753571840.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-40/training_args.bin b/checkpoint-40/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..cb63784509221847be48d014818210f672680633 --- /dev/null +++ b/checkpoint-40/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28f84457fcf5aef029a467534eb875435a77ddb3edb5e81e398d5b46ff0577c1 +size 4600 diff --git a/checkpoint-50/README.md b/checkpoint-50/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2c644dc0050293b428fdc45a928acfadeace0ae7 --- /dev/null +++ b/checkpoint-50/README.md @@ -0,0 +1,218 @@ +--- +library_name: peft +base_model: ybelkada/Mistral-7B-v0.1-bf16-sharded +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + +- PEFT 0.6.3.dev0 \ No newline at end of file diff --git a/checkpoint-50/adapter_config.json b/checkpoint-50/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..98beeb019e9c43b6a8655b280b604de3df39e2e2 --- /dev/null +++ b/checkpoint-50/adapter_config.json @@ -0,0 +1,30 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "ybelkada/Mistral-7B-v0.1-bf16-sharded", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "lm_head", + "v_proj", + "q_proj", + "up_proj", + "down_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-50/adapter_model.safetensors b/checkpoint-50/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e51f0dda3c3d948f7f09aa9ae995900e7463373 --- /dev/null +++ b/checkpoint-50/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e47cbd61ac8b4fc6a4d8b1903b21750f7159806c4667eb6daee21fe663769a +size 609389712 diff --git a/checkpoint-50/optimizer.pt b/checkpoint-50/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d7ea32bb706c248dd4da526007125cd018d98c76 --- /dev/null +++ b/checkpoint-50/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d235e19af1a14150d2d14f57e709c81de076f732926cac6a8fd909e1223b7d19 +size 43126684 diff --git a/checkpoint-50/rng_state.pth b/checkpoint-50/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2252a242251b99438326f19a11ef2ffc01037c2c --- /dev/null +++ b/checkpoint-50/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5242faacf1d7bfe4f67a7d908ad04417fb6ac237f77997d5c3eb1749e1ab9876 +size 14244 diff --git a/checkpoint-50/scheduler.pt b/checkpoint-50/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1e76c6001030451b077ebfd1226d136156c9d7a9 --- /dev/null +++ b/checkpoint-50/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c110ed2fc1c5f1438131eebf89c151ddb8241b19b640ba43d3eced14f2317d89 +size 1064 diff --git a/checkpoint-50/trainer_state.json b/checkpoint-50/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0df195a9163bedb694eadfbffb9bc053fd2711da --- /dev/null +++ b/checkpoint-50/trainer_state.json @@ -0,0 +1,90 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9615384615384616, + "eval_steps": 10, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.19, + "learning_rate": 2.272727272727273e-05, + "loss": 1.7377, + "step": 10 + }, + { + "epoch": 0.19, + "eval_loss": 1.3826087713241577, + "eval_runtime": 164.229, + "eval_samples_per_second": 0.304, + "eval_steps_per_second": 0.043, + "step": 10 + }, + { + "epoch": 0.38, + "learning_rate": 2.0202020202020203e-05, + "loss": 1.0247, + "step": 20 + }, + { + "epoch": 0.38, + "eval_loss": 0.6683715581893921, + "eval_runtime": 163.0119, + "eval_samples_per_second": 0.307, + "eval_steps_per_second": 0.043, + "step": 20 + }, + { + "epoch": 0.58, + "learning_rate": 1.7676767676767676e-05, + "loss": 0.4914, + "step": 30 + }, + { + "epoch": 0.58, + "eval_loss": 0.34242257475852966, + "eval_runtime": 164.0724, + "eval_samples_per_second": 0.305, + "eval_steps_per_second": 0.043, + "step": 30 + }, + { + "epoch": 0.77, + "learning_rate": 1.5151515151515153e-05, + "loss": 0.3506, + "step": 40 + }, + { + "epoch": 0.77, + "eval_loss": 0.3160565197467804, + "eval_runtime": 163.8993, + "eval_samples_per_second": 0.305, + "eval_steps_per_second": 0.043, + "step": 40 + }, + { + "epoch": 0.96, + "learning_rate": 1.2626262626262628e-05, + "loss": 0.3133, + "step": 50 + }, + { + "epoch": 0.96, + "eval_loss": 0.3000437915325165, + "eval_runtime": 163.2552, + "eval_samples_per_second": 0.306, + "eval_steps_per_second": 0.043, + "step": 50 + } + ], + "logging_steps": 10, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 10, + "total_flos": 2190925941964800.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-50/training_args.bin b/checkpoint-50/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..cb63784509221847be48d014818210f672680633 --- /dev/null +++ b/checkpoint-50/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28f84457fcf5aef029a467534eb875435a77ddb3edb5e81e398d5b46ff0577c1 +size 4600 diff --git a/checkpoint-60/README.md b/checkpoint-60/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2c644dc0050293b428fdc45a928acfadeace0ae7 --- /dev/null +++ b/checkpoint-60/README.md @@ -0,0 +1,218 @@ +--- +library_name: peft +base_model: ybelkada/Mistral-7B-v0.1-bf16-sharded +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + +- PEFT 0.6.3.dev0 \ No newline at end of file diff --git a/checkpoint-60/adapter_config.json b/checkpoint-60/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..98beeb019e9c43b6a8655b280b604de3df39e2e2 --- /dev/null +++ b/checkpoint-60/adapter_config.json @@ -0,0 +1,30 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "ybelkada/Mistral-7B-v0.1-bf16-sharded", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "lm_head", + "v_proj", + "q_proj", + "up_proj", + "down_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-60/adapter_model.safetensors b/checkpoint-60/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dfd7d6ff59d73cd92a1bd95942b8acee8d76c259 --- /dev/null +++ b/checkpoint-60/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6a7a8a9ce1db5765af15d240a8d9fb5560536218071bb2eb3b25815a8b93f24 +size 609389712 diff --git a/checkpoint-60/optimizer.pt b/checkpoint-60/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..18247fac6e1d301f6e279adb02d5bfdd8fd7a96c --- /dev/null +++ b/checkpoint-60/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3eb6331bbbe6dcbe6d4a2da296b8c16068f68a2656c4fef2056eef81378634f5 +size 43126684 diff --git a/checkpoint-60/rng_state.pth b/checkpoint-60/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ace3621ab65a981fc112d0dbce8892db8e947b9d --- /dev/null +++ b/checkpoint-60/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c7da9fba99d6ab4379024ba3fef59cc6cdd9f1cb884b5e6028f51df2fb4d334 +size 14244 diff --git a/checkpoint-60/scheduler.pt b/checkpoint-60/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..261b1d525536103fe224b195610788f686a1dbf5 --- /dev/null +++ b/checkpoint-60/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ccfee5ab0d0b106f5bb26dbdf439d4f9806b8904243ebabdb84fbd3c9e2cb82 +size 1064 diff --git a/checkpoint-60/trainer_state.json b/checkpoint-60/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..234d155c1239250526443cbae1a62b22c701d12e --- /dev/null +++ b/checkpoint-60/trainer_state.json @@ -0,0 +1,104 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.1538461538461537, + "eval_steps": 10, + "global_step": 60, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.19, + "learning_rate": 2.272727272727273e-05, + "loss": 1.7377, + "step": 10 + }, + { + "epoch": 0.19, + "eval_loss": 1.3826087713241577, + "eval_runtime": 164.229, + "eval_samples_per_second": 0.304, + "eval_steps_per_second": 0.043, + "step": 10 + }, + { + "epoch": 0.38, + "learning_rate": 2.0202020202020203e-05, + "loss": 1.0247, + "step": 20 + }, + { + "epoch": 0.38, + "eval_loss": 0.6683715581893921, + "eval_runtime": 163.0119, + "eval_samples_per_second": 0.307, + "eval_steps_per_second": 0.043, + "step": 20 + }, + { + "epoch": 0.58, + "learning_rate": 1.7676767676767676e-05, + "loss": 0.4914, + "step": 30 + }, + { + "epoch": 0.58, + "eval_loss": 0.34242257475852966, + "eval_runtime": 164.0724, + "eval_samples_per_second": 0.305, + "eval_steps_per_second": 0.043, + "step": 30 + }, + { + "epoch": 0.77, + "learning_rate": 1.5151515151515153e-05, + "loss": 0.3506, + "step": 40 + }, + { + "epoch": 0.77, + "eval_loss": 0.3160565197467804, + "eval_runtime": 163.8993, + "eval_samples_per_second": 0.305, + "eval_steps_per_second": 0.043, + "step": 40 + }, + { + "epoch": 0.96, + "learning_rate": 1.2626262626262628e-05, + "loss": 0.3133, + "step": 50 + }, + { + "epoch": 0.96, + "eval_loss": 0.3000437915325165, + "eval_runtime": 163.2552, + "eval_samples_per_second": 0.306, + "eval_steps_per_second": 0.043, + "step": 50 + }, + { + "epoch": 1.15, + "learning_rate": 1.0101010101010101e-05, + "loss": 0.3185, + "step": 60 + }, + { + "epoch": 1.15, + "eval_loss": 0.2911369800567627, + "eval_runtime": 162.9849, + "eval_samples_per_second": 0.307, + "eval_steps_per_second": 0.043, + "step": 60 + } + ], + "logging_steps": 10, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 10, + "total_flos": 2607201870938112.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-60/training_args.bin b/checkpoint-60/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..cb63784509221847be48d014818210f672680633 --- /dev/null +++ b/checkpoint-60/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28f84457fcf5aef029a467534eb875435a77ddb3edb5e81e398d5b46ff0577c1 +size 4600 diff --git a/checkpoint-70/README.md b/checkpoint-70/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2c644dc0050293b428fdc45a928acfadeace0ae7 --- /dev/null +++ b/checkpoint-70/README.md @@ -0,0 +1,218 @@ +--- +library_name: peft +base_model: ybelkada/Mistral-7B-v0.1-bf16-sharded +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + +- PEFT 0.6.3.dev0 \ No newline at end of file diff --git a/checkpoint-70/adapter_config.json b/checkpoint-70/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..98beeb019e9c43b6a8655b280b604de3df39e2e2 --- /dev/null +++ b/checkpoint-70/adapter_config.json @@ -0,0 +1,30 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "ybelkada/Mistral-7B-v0.1-bf16-sharded", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "lm_head", + "v_proj", + "q_proj", + "up_proj", + "down_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-70/adapter_model.safetensors b/checkpoint-70/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..560211b52747b1951997cc40ce0d715197d61474 --- /dev/null +++ b/checkpoint-70/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f0d0dcae91946f0ed9ab4826b2526285a85ba98d7609a5df528bc850c997d16 +size 609389712 diff --git a/checkpoint-70/optimizer.pt b/checkpoint-70/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..878361460d6d709d8424d8570f98ac4b5de5fd58 --- /dev/null +++ b/checkpoint-70/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:945caf4b1b3239182707969f3ec49152b4b9bdaf7a55d6340903fe18f5af58ba +size 43126684 diff --git a/checkpoint-70/rng_state.pth b/checkpoint-70/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f373ab065747486b14bb1f2bff342ac5a5c1dc4b --- /dev/null +++ b/checkpoint-70/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c61b04c230c2392930ec05d9817f9100c1bff1c6501cca2cb92006307b3637c +size 14244 diff --git a/checkpoint-70/scheduler.pt b/checkpoint-70/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f52cb8ed6fea8797b23368cd563f180da4f86b92 --- /dev/null +++ b/checkpoint-70/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2afdfdeca76ce866f8aaecb941fc301db96e838e9c7f416400c14358b246e42a +size 1064 diff --git a/checkpoint-70/trainer_state.json b/checkpoint-70/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d71669b4f448175c5adc7fa47834ce0634ace57d --- /dev/null +++ b/checkpoint-70/trainer_state.json @@ -0,0 +1,118 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.3461538461538463, + "eval_steps": 10, + "global_step": 70, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.19, + "learning_rate": 2.272727272727273e-05, + "loss": 1.7377, + "step": 10 + }, + { + "epoch": 0.19, + "eval_loss": 1.3826087713241577, + "eval_runtime": 164.229, + "eval_samples_per_second": 0.304, + "eval_steps_per_second": 0.043, + "step": 10 + }, + { + "epoch": 0.38, + "learning_rate": 2.0202020202020203e-05, + "loss": 1.0247, + "step": 20 + }, + { + "epoch": 0.38, + "eval_loss": 0.6683715581893921, + "eval_runtime": 163.0119, + "eval_samples_per_second": 0.307, + "eval_steps_per_second": 0.043, + "step": 20 + }, + { + "epoch": 0.58, + "learning_rate": 1.7676767676767676e-05, + "loss": 0.4914, + "step": 30 + }, + { + "epoch": 0.58, + "eval_loss": 0.34242257475852966, + "eval_runtime": 164.0724, + "eval_samples_per_second": 0.305, + "eval_steps_per_second": 0.043, + "step": 30 + }, + { + "epoch": 0.77, + "learning_rate": 1.5151515151515153e-05, + "loss": 0.3506, + "step": 40 + }, + { + "epoch": 0.77, + "eval_loss": 0.3160565197467804, + "eval_runtime": 163.8993, + "eval_samples_per_second": 0.305, + "eval_steps_per_second": 0.043, + "step": 40 + }, + { + "epoch": 0.96, + "learning_rate": 1.2626262626262628e-05, + "loss": 0.3133, + "step": 50 + }, + { + "epoch": 0.96, + "eval_loss": 0.3000437915325165, + "eval_runtime": 163.2552, + "eval_samples_per_second": 0.306, + "eval_steps_per_second": 0.043, + "step": 50 + }, + { + "epoch": 1.15, + "learning_rate": 1.0101010101010101e-05, + "loss": 0.3185, + "step": 60 + }, + { + "epoch": 1.15, + "eval_loss": 0.2911369800567627, + "eval_runtime": 162.9849, + "eval_samples_per_second": 0.307, + "eval_steps_per_second": 0.043, + "step": 60 + }, + { + "epoch": 1.35, + "learning_rate": 7.5757575757575764e-06, + "loss": 0.2703, + "step": 70 + }, + { + "epoch": 1.35, + "eval_loss": 0.2851818799972534, + "eval_runtime": 163.5768, + "eval_samples_per_second": 0.306, + "eval_steps_per_second": 0.043, + "step": 70 + } + ], + "logging_steps": 10, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 10, + "total_flos": 3045387059331072.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-70/training_args.bin b/checkpoint-70/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..cb63784509221847be48d014818210f672680633 --- /dev/null +++ b/checkpoint-70/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28f84457fcf5aef029a467534eb875435a77ddb3edb5e81e398d5b46ff0577c1 +size 4600 diff --git a/checkpoint-80/README.md b/checkpoint-80/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2c644dc0050293b428fdc45a928acfadeace0ae7 --- /dev/null +++ b/checkpoint-80/README.md @@ -0,0 +1,218 @@ +--- +library_name: peft +base_model: ybelkada/Mistral-7B-v0.1-bf16-sharded +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + +- PEFT 0.6.3.dev0 \ No newline at end of file diff --git a/checkpoint-80/adapter_config.json b/checkpoint-80/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..98beeb019e9c43b6a8655b280b604de3df39e2e2 --- /dev/null +++ b/checkpoint-80/adapter_config.json @@ -0,0 +1,30 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "ybelkada/Mistral-7B-v0.1-bf16-sharded", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "lm_head", + "v_proj", + "q_proj", + "up_proj", + "down_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-80/adapter_model.safetensors b/checkpoint-80/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6c90fddcb4ca28c393590a385475672dc2cee7ff --- /dev/null +++ b/checkpoint-80/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15a5c1cde34d9020cf7098b67ceba6cd4c5e6c9417d9fafdbc84fad7b4e9f178 +size 609389712 diff --git a/checkpoint-80/optimizer.pt b/checkpoint-80/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..01d71b2a3f4cd8b07439113c80324222b7021ed4 --- /dev/null +++ b/checkpoint-80/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a59cc07105829db5a80e966aad8b19a50b9e46eed8b2e270cb39c34012885d6e +size 43126684 diff --git a/checkpoint-80/rng_state.pth b/checkpoint-80/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b51194147db84ee36d937e9ffadcece9f7241db6 --- /dev/null +++ b/checkpoint-80/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a880836fed89a44c75f6da3b72a83da65783a0ef80c2b6bdc58a36431a7ffc58 +size 14244 diff --git a/checkpoint-80/scheduler.pt b/checkpoint-80/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5fa1cbc578bf25cc571dee18a7059e48cd23c5fc --- /dev/null +++ b/checkpoint-80/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee0255d19cc2d0791fe82d74ed6c3ad13e4ab6da9fd629fe1d852817a464c09b +size 1064 diff --git a/checkpoint-80/trainer_state.json b/checkpoint-80/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..34871df0574ac46e9100872a1434c04f09f909e9 --- /dev/null +++ b/checkpoint-80/trainer_state.json @@ -0,0 +1,132 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.5384615384615383, + "eval_steps": 10, + "global_step": 80, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.19, + "learning_rate": 2.272727272727273e-05, + "loss": 1.7377, + "step": 10 + }, + { + "epoch": 0.19, + "eval_loss": 1.3826087713241577, + "eval_runtime": 164.229, + "eval_samples_per_second": 0.304, + "eval_steps_per_second": 0.043, + "step": 10 + }, + { + "epoch": 0.38, + "learning_rate": 2.0202020202020203e-05, + "loss": 1.0247, + "step": 20 + }, + { + "epoch": 0.38, + "eval_loss": 0.6683715581893921, + "eval_runtime": 163.0119, + "eval_samples_per_second": 0.307, + "eval_steps_per_second": 0.043, + "step": 20 + }, + { + "epoch": 0.58, + "learning_rate": 1.7676767676767676e-05, + "loss": 0.4914, + "step": 30 + }, + { + "epoch": 0.58, + "eval_loss": 0.34242257475852966, + "eval_runtime": 164.0724, + "eval_samples_per_second": 0.305, + "eval_steps_per_second": 0.043, + "step": 30 + }, + { + "epoch": 0.77, + "learning_rate": 1.5151515151515153e-05, + "loss": 0.3506, + "step": 40 + }, + { + "epoch": 0.77, + "eval_loss": 0.3160565197467804, + "eval_runtime": 163.8993, + "eval_samples_per_second": 0.305, + "eval_steps_per_second": 0.043, + "step": 40 + }, + { + "epoch": 0.96, + "learning_rate": 1.2626262626262628e-05, + "loss": 0.3133, + "step": 50 + }, + { + "epoch": 0.96, + "eval_loss": 0.3000437915325165, + "eval_runtime": 163.2552, + "eval_samples_per_second": 0.306, + "eval_steps_per_second": 0.043, + "step": 50 + }, + { + "epoch": 1.15, + "learning_rate": 1.0101010101010101e-05, + "loss": 0.3185, + "step": 60 + }, + { + "epoch": 1.15, + "eval_loss": 0.2911369800567627, + "eval_runtime": 162.9849, + "eval_samples_per_second": 0.307, + "eval_steps_per_second": 0.043, + "step": 60 + }, + { + "epoch": 1.35, + "learning_rate": 7.5757575757575764e-06, + "loss": 0.2703, + "step": 70 + }, + { + "epoch": 1.35, + "eval_loss": 0.2851818799972534, + "eval_runtime": 163.5768, + "eval_samples_per_second": 0.306, + "eval_steps_per_second": 0.043, + "step": 70 + }, + { + "epoch": 1.54, + "learning_rate": 5.050505050505051e-06, + "loss": 0.2451, + "step": 80 + }, + { + "epoch": 1.54, + "eval_loss": 0.27840811014175415, + "eval_runtime": 163.9369, + "eval_samples_per_second": 0.305, + "eval_steps_per_second": 0.043, + "step": 80 + } + ], + "logging_steps": 10, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 10, + "total_flos": 3483572247724032.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-80/training_args.bin b/checkpoint-80/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..cb63784509221847be48d014818210f672680633 --- /dev/null +++ b/checkpoint-80/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28f84457fcf5aef029a467534eb875435a77ddb3edb5e81e398d5b46ff0577c1 +size 4600 diff --git a/checkpoint-90/README.md b/checkpoint-90/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2c644dc0050293b428fdc45a928acfadeace0ae7 --- /dev/null +++ b/checkpoint-90/README.md @@ -0,0 +1,218 @@ +--- +library_name: peft +base_model: ybelkada/Mistral-7B-v0.1-bf16-sharded +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + +- PEFT 0.6.3.dev0 \ No newline at end of file diff --git a/checkpoint-90/adapter_config.json b/checkpoint-90/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..98beeb019e9c43b6a8655b280b604de3df39e2e2 --- /dev/null +++ b/checkpoint-90/adapter_config.json @@ -0,0 +1,30 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "ybelkada/Mistral-7B-v0.1-bf16-sharded", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "lm_head", + "v_proj", + "q_proj", + "up_proj", + "down_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-90/adapter_model.safetensors b/checkpoint-90/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1c329a67933c2dd520d7f316b6c13eb63a80c6d1 --- /dev/null +++ b/checkpoint-90/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aada75fb05943364dd8d0de28b5924ad30e91ab482a67f3d042f5fe3eff8aff +size 609389712 diff --git a/checkpoint-90/optimizer.pt b/checkpoint-90/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7eca7c96c1b9d93d4891f891c772ab0b41ae8953 --- /dev/null +++ b/checkpoint-90/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b04ef82186b7b6f101009773449ec437215ba579ec45d6faa5199306215e516b +size 43126684 diff --git a/checkpoint-90/rng_state.pth b/checkpoint-90/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c2707bead89b4ef3485c1710909b665c1819d86 --- /dev/null +++ b/checkpoint-90/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90ef44b61e8b472485e391fa8e13e033ea94d2ed11bf58e6b7553404586f81be +size 14244 diff --git a/checkpoint-90/scheduler.pt b/checkpoint-90/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5b0e3b69a18ce50fea5fac942f0389d48730512 --- /dev/null +++ b/checkpoint-90/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81e09836f3662486385ddcbd74aa6763671910b2c212db9dc0666706dba1237c +size 1064 diff --git a/checkpoint-90/trainer_state.json b/checkpoint-90/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2e790664df55facaedf5badcd96c818d3d4c26d4 --- /dev/null +++ b/checkpoint-90/trainer_state.json @@ -0,0 +1,146 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.7307692307692308, + "eval_steps": 10, + "global_step": 90, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.19, + "learning_rate": 2.272727272727273e-05, + "loss": 1.7377, + "step": 10 + }, + { + "epoch": 0.19, + "eval_loss": 1.3826087713241577, + "eval_runtime": 164.229, + "eval_samples_per_second": 0.304, + "eval_steps_per_second": 0.043, + "step": 10 + }, + { + "epoch": 0.38, + "learning_rate": 2.0202020202020203e-05, + "loss": 1.0247, + "step": 20 + }, + { + "epoch": 0.38, + "eval_loss": 0.6683715581893921, + "eval_runtime": 163.0119, + "eval_samples_per_second": 0.307, + "eval_steps_per_second": 0.043, + "step": 20 + }, + { + "epoch": 0.58, + "learning_rate": 1.7676767676767676e-05, + "loss": 0.4914, + "step": 30 + }, + { + "epoch": 0.58, + "eval_loss": 0.34242257475852966, + "eval_runtime": 164.0724, + "eval_samples_per_second": 0.305, + "eval_steps_per_second": 0.043, + "step": 30 + }, + { + "epoch": 0.77, + "learning_rate": 1.5151515151515153e-05, + "loss": 0.3506, + "step": 40 + }, + { + "epoch": 0.77, + "eval_loss": 0.3160565197467804, + "eval_runtime": 163.8993, + "eval_samples_per_second": 0.305, + "eval_steps_per_second": 0.043, + "step": 40 + }, + { + "epoch": 0.96, + "learning_rate": 1.2626262626262628e-05, + "loss": 0.3133, + "step": 50 + }, + { + "epoch": 0.96, + "eval_loss": 0.3000437915325165, + "eval_runtime": 163.2552, + "eval_samples_per_second": 0.306, + "eval_steps_per_second": 0.043, + "step": 50 + }, + { + "epoch": 1.15, + "learning_rate": 1.0101010101010101e-05, + "loss": 0.3185, + "step": 60 + }, + { + "epoch": 1.15, + "eval_loss": 0.2911369800567627, + "eval_runtime": 162.9849, + "eval_samples_per_second": 0.307, + "eval_steps_per_second": 0.043, + "step": 60 + }, + { + "epoch": 1.35, + "learning_rate": 7.5757575757575764e-06, + "loss": 0.2703, + "step": 70 + }, + { + "epoch": 1.35, + "eval_loss": 0.2851818799972534, + "eval_runtime": 163.5768, + "eval_samples_per_second": 0.306, + "eval_steps_per_second": 0.043, + "step": 70 + }, + { + "epoch": 1.54, + "learning_rate": 5.050505050505051e-06, + "loss": 0.2451, + "step": 80 + }, + { + "epoch": 1.54, + "eval_loss": 0.27840811014175415, + "eval_runtime": 163.9369, + "eval_samples_per_second": 0.305, + "eval_steps_per_second": 0.043, + "step": 80 + }, + { + "epoch": 1.73, + "learning_rate": 2.5252525252525253e-06, + "loss": 0.2702, + "step": 90 + }, + { + "epoch": 1.73, + "eval_loss": 0.2748894989490509, + "eval_runtime": 162.9364, + "eval_samples_per_second": 0.307, + "eval_steps_per_second": 0.043, + "step": 90 + } + ], + "logging_steps": 10, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 10, + "total_flos": 3921757436116992.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-90/training_args.bin b/checkpoint-90/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..cb63784509221847be48d014818210f672680633 --- /dev/null +++ b/checkpoint-90/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28f84457fcf5aef029a467534eb875435a77ddb3edb5e81e398d5b46ff0577c1 +size 4600