diff --git a/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/.ipynb_checkpoints/Untitled-checkpoint.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..363fcab7ed6e9634e198cf5555ceb88932c9a245 --- /dev/null +++ b/.ipynb_checkpoints/Untitled-checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Untitled.ipynb b/Untitled.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..56ed1c5c7239e2dc207025810bfa1eba25ee586d --- /dev/null +++ b/Untitled.ipynb @@ -0,0 +1,50 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "127668ee-44f0-4438-9337-e7c4a486aea3", + "metadata": {}, + "outputs": [], + "source": [ + "from huggingface_hub import HfApi\n", + "\n", + "api = HfApi()\n", + "\n", + "# Upload all the content from the local folder to your remote Space.\n", + "# By default, files are uploaded at the root of the repo\n", + "\n", + "api.upload_folder(\n", + "\n", + " folder_path=r\"C:\\dataset\\New folder\",\n", + "\n", + " repo_id=\"MinervaAI/Random-roleplay-instruction\",\n", + "\n", + " repo_type=\"dataset\",\n", + "\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7f7646c3b0f63338a5c87f9f689809311fb57d15 --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "NousResearch/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": false, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "up_proj", + "gate_proj", + "o_proj", + "k_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..9c16aa4be022f03ad001b006fba14dfb73a1929c --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,3 @@ +{ + "": 32000 +} diff --git a/checkpoint-10/README.md b/checkpoint-10/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5a1a8e25fab70af1859fa3c79fcd46ce9c8ff92f --- /dev/null +++ b/checkpoint-10/README.md @@ -0,0 +1,34 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + +- PEFT 0.6.0.dev0 + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-10/adapter_config.json b/checkpoint-10/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a --- /dev/null +++ b/checkpoint-10/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "NousResearch/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "up_proj", + "gate_proj", + "o_proj", + "k_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-10/adapter_model.bin b/checkpoint-10/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..ad502817425d5cbd7f2db7c5f80f3d34e4aa3c09 --- /dev/null +++ b/checkpoint-10/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d73dd5f0590e7395a03feab2c12262e8144fdb10f84d6cc08f5c2b521c7d832 +size 62788109 diff --git a/checkpoint-10/adapter_model/README.md b/checkpoint-10/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5a1a8e25fab70af1859fa3c79fcd46ce9c8ff92f --- /dev/null +++ b/checkpoint-10/adapter_model/README.md @@ -0,0 +1,34 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + +- PEFT 0.6.0.dev0 + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-10/adapter_model/adapter_config.json b/checkpoint-10/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a --- /dev/null +++ b/checkpoint-10/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "NousResearch/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "up_proj", + "gate_proj", + "o_proj", + "k_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-10/adapter_model/adapter_model.bin b/checkpoint-10/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..ad502817425d5cbd7f2db7c5f80f3d34e4aa3c09 --- /dev/null +++ b/checkpoint-10/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d73dd5f0590e7395a03feab2c12262e8144fdb10f84d6cc08f5c2b521c7d832 +size 62788109 diff --git a/checkpoint-10/optimizer.pt b/checkpoint-10/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9d3b14bc6d88e74a9c278965db08c3f4531c8856 --- /dev/null +++ b/checkpoint-10/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93fc15492ebf7acd0d7da2effdc336db5cf94da3d033e4bfcba017a5ba9ff578 +size 250681597 diff --git a/checkpoint-10/rng_state.pth b/checkpoint-10/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3941b167aeed93112d0719c958b58e72f2ec8bd2 --- /dev/null +++ b/checkpoint-10/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8d0d39ee3f495298b1364d1a694b15161510d7f6b4a57e4ad295a4590655a19 +size 14575 diff --git a/checkpoint-10/scheduler.pt b/checkpoint-10/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..19b0f5c3b6b112717270d933dc00cea683e16262 --- /dev/null +++ b/checkpoint-10/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3f23bc239cbe3e7a4f862f2448a71aa49fc3b361c6c5866cc00242a779c0be4 +size 627 diff --git a/checkpoint-10/trainer_state.json b/checkpoint-10/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5cd9682d5a75a3cf56fb5989028c9dc62ed1d503 --- /dev/null +++ b/checkpoint-10/trainer_state.json @@ -0,0 +1,87 @@ +{ + "best_metric": 2.098437547683716, + "best_model_checkpoint": "./qlora-out-kimiko-test2/checkpoint-10", + "epoch": 0.25848142164781907, + "eval_steps": 10, + "global_step": 10, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03, + "learning_rate": 1e-05, + "loss": 1.5707, + "step": 1 + }, + { + "epoch": 0.05, + "learning_rate": 2e-05, + "loss": 1.5621, + "step": 2 + }, + { + "epoch": 0.08, + "learning_rate": 3e-05, + "loss": 1.4812, + "step": 3 + }, + { + "epoch": 0.1, + "learning_rate": 4e-05, + "loss": 1.5197, + "step": 4 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 1.5567, + "step": 5 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 1.4645, + "step": 6 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 1.6122, + "step": 7 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 1.5596, + "step": 8 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 1.5608, + "step": 9 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 1.5456, + "step": 10 + }, + { + "epoch": 0.26, + "eval_loss": 2.098437547683716, + "eval_runtime": 119.6161, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 0.777, + "step": 10 + } + ], + "logging_steps": 1, + "max_steps": 114, + "num_train_epochs": 3, + "save_steps": 10, + "total_flos": 4.582587092041728e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-10/training_args.bin b/checkpoint-10/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8846f68deb94b60cd596e766674727db1bceb44e --- /dev/null +++ b/checkpoint-10/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca362c6e6a1bbe2523b2190a501c92d6dbb3db6186bef551619d83852cca3df1 +size 4219 diff --git a/checkpoint-20/README.md b/checkpoint-20/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/checkpoint-20/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-20/adapter_config.json b/checkpoint-20/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a --- /dev/null +++ b/checkpoint-20/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "NousResearch/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "up_proj", + "gate_proj", + "o_proj", + "k_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-20/adapter_model.bin b/checkpoint-20/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..7615732f2ddc126ab3490324549af7e959adc804 --- /dev/null +++ b/checkpoint-20/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03d0c38222a997a52bfe76857917321fb2bc8d10553b4da4ff8390643c25f962 +size 62788109 diff --git a/checkpoint-20/adapter_model/README.md b/checkpoint-20/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/checkpoint-20/adapter_model/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-20/adapter_model/adapter_config.json b/checkpoint-20/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a --- /dev/null +++ b/checkpoint-20/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "NousResearch/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "up_proj", + "gate_proj", + "o_proj", + "k_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-20/adapter_model/adapter_model.bin b/checkpoint-20/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..7615732f2ddc126ab3490324549af7e959adc804 --- /dev/null +++ b/checkpoint-20/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03d0c38222a997a52bfe76857917321fb2bc8d10553b4da4ff8390643c25f962 +size 62788109 diff --git a/checkpoint-20/optimizer.pt b/checkpoint-20/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a7f282f053f06ee4b0703c9ff8634995fe830bf --- /dev/null +++ b/checkpoint-20/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56e8c6c5c1d0e07913575e95580988f8c4d4d451e12e22619c79c5df6b48dc53 +size 250681597 diff --git a/checkpoint-20/rng_state.pth b/checkpoint-20/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f314eb61d07ca624a4eb51cab0bec85c72aab6d1 --- /dev/null +++ b/checkpoint-20/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cf66b0a499885f833314015600150cb6bfa74e1505e5608890c7c4ba655a6ba +size 14575 diff --git a/checkpoint-20/scheduler.pt b/checkpoint-20/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..75b0bacc84680716c87f31c95924d0e103e050af --- /dev/null +++ b/checkpoint-20/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e64bd2871a807ae2da11c5073d611b4d4223c336499ef21e9e856aaa448a1a35 +size 627 diff --git a/checkpoint-20/trainer_state.json b/checkpoint-20/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f37a8a2dd39940b76a609b63a172eaff92ff7b55 --- /dev/null +++ b/checkpoint-20/trainer_state.json @@ -0,0 +1,155 @@ +{ + "best_metric": 2.066981077194214, + "best_model_checkpoint": "./qlora-out-kimiko-test2/checkpoint-20", + "epoch": 0.5169628432956381, + "eval_steps": 10, + "global_step": 20, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03, + "learning_rate": 1e-05, + "loss": 1.5707, + "step": 1 + }, + { + "epoch": 0.05, + "learning_rate": 2e-05, + "loss": 1.5621, + "step": 2 + }, + { + "epoch": 0.08, + "learning_rate": 3e-05, + "loss": 1.4812, + "step": 3 + }, + { + "epoch": 0.1, + "learning_rate": 4e-05, + "loss": 1.5197, + "step": 4 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 1.5567, + "step": 5 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 1.4645, + "step": 6 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 1.6122, + "step": 7 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 1.5596, + "step": 8 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 1.5608, + "step": 9 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 1.5456, + "step": 10 + }, + { + "epoch": 0.26, + "eval_loss": 2.098437547683716, + "eval_runtime": 119.6161, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 0.777, + "step": 10 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 1.5645, + "step": 11 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 1.538, + "step": 12 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 1.6388, + "step": 13 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 1.4943, + "step": 14 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 1.5469, + "step": 15 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 1.6149, + "step": 16 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 1.5345, + "step": 17 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 1.4903, + "step": 18 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 1.5499, + "step": 19 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 1.5934, + "step": 20 + }, + { + "epoch": 0.52, + "eval_loss": 2.066981077194214, + "eval_runtime": 119.5781, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 0.778, + "step": 20 + } + ], + "logging_steps": 1, + "max_steps": 114, + "num_train_epochs": 3, + "save_steps": 10, + "total_flos": 9.195307914756096e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-20/training_args.bin b/checkpoint-20/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8846f68deb94b60cd596e766674727db1bceb44e --- /dev/null +++ b/checkpoint-20/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca362c6e6a1bbe2523b2190a501c92d6dbb3db6186bef551619d83852cca3df1 +size 4219 diff --git a/checkpoint-30/README.md b/checkpoint-30/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/checkpoint-30/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-30/adapter_config.json b/checkpoint-30/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a --- /dev/null +++ b/checkpoint-30/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "NousResearch/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "up_proj", + "gate_proj", + "o_proj", + "k_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-30/adapter_model.bin b/checkpoint-30/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..60e8958adfb7d66a5df857f7f3279b8f60779563 --- /dev/null +++ b/checkpoint-30/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4276aa46f33d393edc2d19308e186a1f07580d76eddb0bb6339bc4f3e80b9f58 +size 62788109 diff --git a/checkpoint-30/adapter_model/README.md b/checkpoint-30/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/checkpoint-30/adapter_model/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-30/adapter_model/adapter_config.json b/checkpoint-30/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a --- /dev/null +++ b/checkpoint-30/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "NousResearch/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "up_proj", + "gate_proj", + "o_proj", + "k_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-30/adapter_model/adapter_model.bin b/checkpoint-30/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..60e8958adfb7d66a5df857f7f3279b8f60779563 --- /dev/null +++ b/checkpoint-30/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4276aa46f33d393edc2d19308e186a1f07580d76eddb0bb6339bc4f3e80b9f58 +size 62788109 diff --git a/checkpoint-30/optimizer.pt b/checkpoint-30/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e9f4c649b429c679bfb26c27d7de757f79bfe93 --- /dev/null +++ b/checkpoint-30/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b18fc6beca12122a3d4309af96449623e23a0d54f4b1c5088810cb8933a25809 +size 250681597 diff --git a/checkpoint-30/rng_state.pth b/checkpoint-30/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..03e811514791be839125b38c0162643843b81a6a --- /dev/null +++ b/checkpoint-30/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbf64f9f720011db899c09efa51061d9067d83e6fc2235e5bb9d7087d72402fd +size 14575 diff --git a/checkpoint-30/scheduler.pt b/checkpoint-30/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f24f347cc22fffc6fc6e5780b42991023f54ce34 --- /dev/null +++ b/checkpoint-30/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:298c19c2d0215ea63da7419132c9dac6c9c75fdd531e32dfc1cb87dbb0aa8259 +size 627 diff --git a/checkpoint-30/trainer_state.json b/checkpoint-30/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9d45cbc591335acc0ae193b0b81103a0c8f47add --- /dev/null +++ b/checkpoint-30/trainer_state.json @@ -0,0 +1,223 @@ +{ + "best_metric": 2.046339750289917, + "best_model_checkpoint": "./qlora-out-kimiko-test2/checkpoint-30", + "epoch": 0.7754442649434572, + "eval_steps": 10, + "global_step": 30, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03, + "learning_rate": 1e-05, + "loss": 1.5707, + "step": 1 + }, + { + "epoch": 0.05, + "learning_rate": 2e-05, + "loss": 1.5621, + "step": 2 + }, + { + "epoch": 0.08, + "learning_rate": 3e-05, + "loss": 1.4812, + "step": 3 + }, + { + "epoch": 0.1, + "learning_rate": 4e-05, + "loss": 1.5197, + "step": 4 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 1.5567, + "step": 5 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 1.4645, + "step": 6 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 1.6122, + "step": 7 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 1.5596, + "step": 8 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 1.5608, + "step": 9 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 1.5456, + "step": 10 + }, + { + "epoch": 0.26, + "eval_loss": 2.098437547683716, + "eval_runtime": 119.6161, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 0.777, + "step": 10 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 1.5645, + "step": 11 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 1.538, + "step": 12 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 1.6388, + "step": 13 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 1.4943, + "step": 14 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 1.5469, + "step": 15 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 1.6149, + "step": 16 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 1.5345, + "step": 17 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 1.4903, + "step": 18 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 1.5499, + "step": 19 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 1.5934, + "step": 20 + }, + { + "epoch": 0.52, + "eval_loss": 2.066981077194214, + "eval_runtime": 119.5781, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 0.778, + "step": 20 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 1.4554, + "step": 21 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 1.5512, + "step": 22 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 1.4636, + "step": 23 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 1.5398, + "step": 24 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 1.5623, + "step": 25 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 1.4658, + "step": 26 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 1.4723, + "step": 27 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 1.432, + "step": 28 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 1.4814, + "step": 29 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 1.4924, + "step": 30 + }, + { + "epoch": 0.78, + "eval_loss": 2.046339750289917, + "eval_runtime": 119.5771, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 0.778, + "step": 30 + } + ], + "logging_steps": 1, + "max_steps": 114, + "num_train_epochs": 3, + "save_steps": 10, + "total_flos": 1.3777643892375552e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-30/training_args.bin b/checkpoint-30/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8846f68deb94b60cd596e766674727db1bceb44e --- /dev/null +++ b/checkpoint-30/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca362c6e6a1bbe2523b2190a501c92d6dbb3db6186bef551619d83852cca3df1 +size 4219 diff --git a/checkpoint-40/README.md b/checkpoint-40/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/checkpoint-40/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-40/adapter_config.json b/checkpoint-40/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a --- /dev/null +++ b/checkpoint-40/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "NousResearch/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "up_proj", + "gate_proj", + "o_proj", + "k_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-40/adapter_model.bin b/checkpoint-40/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..e6c257f607b69d5c1b527e6ebf965b1cf1bc3f4f --- /dev/null +++ b/checkpoint-40/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12f5a629e5ff4db207707d71a1c324a97283a0a371cbd435fb090ea711a9e21c +size 62788109 diff --git a/checkpoint-40/adapter_model/README.md b/checkpoint-40/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/checkpoint-40/adapter_model/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-40/adapter_model/adapter_config.json b/checkpoint-40/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a --- /dev/null +++ b/checkpoint-40/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "NousResearch/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "up_proj", + "gate_proj", + "o_proj", + "k_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-40/adapter_model/adapter_model.bin b/checkpoint-40/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..e6c257f607b69d5c1b527e6ebf965b1cf1bc3f4f --- /dev/null +++ b/checkpoint-40/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12f5a629e5ff4db207707d71a1c324a97283a0a371cbd435fb090ea711a9e21c +size 62788109 diff --git a/checkpoint-40/optimizer.pt b/checkpoint-40/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e771a88866d3ad318eaae2e8fdd8e84fce4c1e2 --- /dev/null +++ b/checkpoint-40/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8c0a407654387b576e7ec5d641bcd7961201b07a96d39e22e3e9d77e541a513 +size 250681597 diff --git a/checkpoint-40/rng_state.pth b/checkpoint-40/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..79140fcff6aa1dc0216e428bff9bc901d2a4ce7d --- /dev/null +++ b/checkpoint-40/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ef3dbc703c54e9cba3cb628e0e6ae4c7f37310ab3398f30c0c5503dcb749f77 +size 14575 diff --git a/checkpoint-40/scheduler.pt b/checkpoint-40/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3810259f60773aafefb3d9d988e241d324ba4fd3 --- /dev/null +++ b/checkpoint-40/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d048c63c660c9f2cb6345c4f756c8abee0e570482b06bf9c08827d79b257394 +size 627 diff --git a/checkpoint-40/trainer_state.json b/checkpoint-40/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1f40efc586203ae9e7937918669e0177d6543aad --- /dev/null +++ b/checkpoint-40/trainer_state.json @@ -0,0 +1,291 @@ +{ + "best_metric": 2.0354697704315186, + "best_model_checkpoint": "./qlora-out-kimiko-test2/checkpoint-40", + "epoch": 1.0339256865912763, + "eval_steps": 10, + "global_step": 40, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03, + "learning_rate": 1e-05, + "loss": 1.5707, + "step": 1 + }, + { + "epoch": 0.05, + "learning_rate": 2e-05, + "loss": 1.5621, + "step": 2 + }, + { + "epoch": 0.08, + "learning_rate": 3e-05, + "loss": 1.4812, + "step": 3 + }, + { + "epoch": 0.1, + "learning_rate": 4e-05, + "loss": 1.5197, + "step": 4 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 1.5567, + "step": 5 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 1.4645, + "step": 6 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 1.6122, + "step": 7 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 1.5596, + "step": 8 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 1.5608, + "step": 9 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 1.5456, + "step": 10 + }, + { + "epoch": 0.26, + "eval_loss": 2.098437547683716, + "eval_runtime": 119.6161, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 0.777, + "step": 10 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 1.5645, + "step": 11 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 1.538, + "step": 12 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 1.6388, + "step": 13 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 1.4943, + "step": 14 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 1.5469, + "step": 15 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 1.6149, + "step": 16 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 1.5345, + "step": 17 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 1.4903, + "step": 18 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 1.5499, + "step": 19 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 1.5934, + "step": 20 + }, + { + "epoch": 0.52, + "eval_loss": 2.066981077194214, + "eval_runtime": 119.5781, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 0.778, + "step": 20 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 1.4554, + "step": 21 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 1.5512, + "step": 22 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 1.4636, + "step": 23 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 1.5398, + "step": 24 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 1.5623, + "step": 25 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 1.4658, + "step": 26 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 1.4723, + "step": 27 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 1.432, + "step": 28 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 1.4814, + "step": 29 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 1.4924, + "step": 30 + }, + { + "epoch": 0.78, + "eval_loss": 2.046339750289917, + "eval_runtime": 119.5771, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 0.778, + "step": 30 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 1.5809, + "step": 31 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 1.4803, + "step": 32 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 1.4878, + "step": 33 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 1.3871, + "step": 34 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 1.5151, + "step": 35 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 1.4212, + "step": 36 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 1.6284, + "step": 37 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 1.5002, + "step": 38 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 1.4452, + "step": 39 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 1.4399, + "step": 40 + }, + { + "epoch": 1.03, + "eval_loss": 2.0354697704315186, + "eval_runtime": 119.5875, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 0.778, + "step": 40 + } + ], + "logging_steps": 1, + "max_steps": 114, + "num_train_epochs": 3, + "save_steps": 10, + "total_flos": 1.8296447921160192e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-40/training_args.bin b/checkpoint-40/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8846f68deb94b60cd596e766674727db1bceb44e --- /dev/null +++ b/checkpoint-40/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca362c6e6a1bbe2523b2190a501c92d6dbb3db6186bef551619d83852cca3df1 +size 4219 diff --git a/checkpoint-50/README.md b/checkpoint-50/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/checkpoint-50/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-50/adapter_config.json b/checkpoint-50/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a --- /dev/null +++ b/checkpoint-50/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "NousResearch/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "up_proj", + "gate_proj", + "o_proj", + "k_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-50/adapter_model.bin b/checkpoint-50/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8dc5d21ed69f5464baad4f11f5ca6c588e9a3b8 --- /dev/null +++ b/checkpoint-50/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ad4a9568f022e507126ea7e1f8f36a695ac0ff02c75b05285a571a3eae428d6 +size 62788109 diff --git a/checkpoint-50/adapter_model/README.md b/checkpoint-50/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/checkpoint-50/adapter_model/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-50/adapter_model/adapter_config.json b/checkpoint-50/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a --- /dev/null +++ b/checkpoint-50/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "NousResearch/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "up_proj", + "gate_proj", + "o_proj", + "k_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-50/adapter_model/adapter_model.bin b/checkpoint-50/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8dc5d21ed69f5464baad4f11f5ca6c588e9a3b8 --- /dev/null +++ b/checkpoint-50/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ad4a9568f022e507126ea7e1f8f36a695ac0ff02c75b05285a571a3eae428d6 +size 62788109 diff --git a/checkpoint-50/optimizer.pt b/checkpoint-50/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e58467aaec4ac53b2ae92c0fc6071fe50e3a3025 --- /dev/null +++ b/checkpoint-50/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9f6218594c41c55f8dc0cd47a6eb49ed24438fddc4fe764e95b27863494020d +size 250681597 diff --git a/checkpoint-50/rng_state.pth b/checkpoint-50/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e00cea4f25415c21cab2a709c02e9e0f5cfb6d5c --- /dev/null +++ b/checkpoint-50/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:585434cf728ba17dfd089b29e20fd26f92ed2f8752cd52f32d800c2aad892a89 +size 14575 diff --git a/checkpoint-50/scheduler.pt b/checkpoint-50/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c887bcc2ab93c7ec18b5c5550fa2fa62c08d6685 --- /dev/null +++ b/checkpoint-50/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:238db3d432ca0bc71d28453f24626baef9ff9f503195c9d58c104586d1a6c8a1 +size 627 diff --git a/checkpoint-50/trainer_state.json b/checkpoint-50/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..560f0a40b7440382c98bee072ba7d0153bd93a7e --- /dev/null +++ b/checkpoint-50/trainer_state.json @@ -0,0 +1,359 @@ +{ + "best_metric": 2.0276732444763184, + "best_model_checkpoint": "./qlora-out-kimiko-test2/checkpoint-50", + "epoch": 1.2924071082390953, + "eval_steps": 10, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03, + "learning_rate": 1e-05, + "loss": 1.5707, + "step": 1 + }, + { + "epoch": 0.05, + "learning_rate": 2e-05, + "loss": 1.5621, + "step": 2 + }, + { + "epoch": 0.08, + "learning_rate": 3e-05, + "loss": 1.4812, + "step": 3 + }, + { + "epoch": 0.1, + "learning_rate": 4e-05, + "loss": 1.5197, + "step": 4 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 1.5567, + "step": 5 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 1.4645, + "step": 6 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 1.6122, + "step": 7 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 1.5596, + "step": 8 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 1.5608, + "step": 9 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 1.5456, + "step": 10 + }, + { + "epoch": 0.26, + "eval_loss": 2.098437547683716, + "eval_runtime": 119.6161, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 0.777, + "step": 10 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 1.5645, + "step": 11 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 1.538, + "step": 12 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 1.6388, + "step": 13 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 1.4943, + "step": 14 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 1.5469, + "step": 15 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 1.6149, + "step": 16 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 1.5345, + "step": 17 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 1.4903, + "step": 18 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 1.5499, + "step": 19 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 1.5934, + "step": 20 + }, + { + "epoch": 0.52, + "eval_loss": 2.066981077194214, + "eval_runtime": 119.5781, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 0.778, + "step": 20 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 1.4554, + "step": 21 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 1.5512, + "step": 22 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 1.4636, + "step": 23 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 1.5398, + "step": 24 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 1.5623, + "step": 25 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 1.4658, + "step": 26 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 1.4723, + "step": 27 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 1.432, + "step": 28 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 1.4814, + "step": 29 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 1.4924, + "step": 30 + }, + { + "epoch": 0.78, + "eval_loss": 2.046339750289917, + "eval_runtime": 119.5771, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 0.778, + "step": 30 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 1.5809, + "step": 31 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 1.4803, + "step": 32 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 1.4878, + "step": 33 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 1.3871, + "step": 34 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 1.5151, + "step": 35 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 1.4212, + "step": 36 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 1.6284, + "step": 37 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 1.5002, + "step": 38 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 1.4452, + "step": 39 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 1.4399, + "step": 40 + }, + { + "epoch": 1.03, + "eval_loss": 2.0354697704315186, + "eval_runtime": 119.5875, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 0.778, + "step": 40 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 1.465, + "step": 41 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 1.4199, + "step": 42 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 1.5403, + "step": 43 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 1.4499, + "step": 44 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 1.5751, + "step": 45 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 1.4809, + "step": 46 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 1.5022, + "step": 47 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 1.4663, + "step": 48 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 1.4435, + "step": 49 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 1.4246, + "step": 50 + }, + { + "epoch": 1.29, + "eval_loss": 2.0276732444763184, + "eval_runtime": 119.5811, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 0.778, + "step": 50 + } + ], + "logging_steps": 1, + "max_steps": 114, + "num_train_epochs": 3, + "save_steps": 10, + "total_flos": 2.2849403511373824e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-50/training_args.bin b/checkpoint-50/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8846f68deb94b60cd596e766674727db1bceb44e --- /dev/null +++ b/checkpoint-50/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca362c6e6a1bbe2523b2190a501c92d6dbb3db6186bef551619d83852cca3df1 +size 4219 diff --git a/checkpoint-60/README.md b/checkpoint-60/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/checkpoint-60/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-60/adapter_config.json b/checkpoint-60/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a --- /dev/null +++ b/checkpoint-60/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "NousResearch/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "up_proj", + "gate_proj", + "o_proj", + "k_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-60/adapter_model.bin b/checkpoint-60/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..0e54e4e0d2f0c0cc73ba5b0f8ec131c3c2c7279f --- /dev/null +++ b/checkpoint-60/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b327deb5fedf5ccbe9c1384b4cdb4dc01776fa03dbeb30a2214f6841f4567748 +size 62788109 diff --git a/checkpoint-60/adapter_model/README.md b/checkpoint-60/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/checkpoint-60/adapter_model/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-60/adapter_model/adapter_config.json b/checkpoint-60/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a --- /dev/null +++ b/checkpoint-60/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "NousResearch/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "up_proj", + "gate_proj", + "o_proj", + "k_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-60/adapter_model/adapter_model.bin b/checkpoint-60/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..0e54e4e0d2f0c0cc73ba5b0f8ec131c3c2c7279f --- /dev/null +++ b/checkpoint-60/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b327deb5fedf5ccbe9c1384b4cdb4dc01776fa03dbeb30a2214f6841f4567748 +size 62788109 diff --git a/checkpoint-60/optimizer.pt b/checkpoint-60/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f7dd54d2c03fff9154cd90e8e69dee7f0e42040e --- /dev/null +++ b/checkpoint-60/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a2f445991ad6bdd4b0ebd91951705c1928028526a6440f6ff2bff79755e40d1 +size 250681597 diff --git a/checkpoint-60/rng_state.pth b/checkpoint-60/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e9e18517c70f5dae72227d3defcf55c223ca9380 --- /dev/null +++ b/checkpoint-60/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62eb8acf4d5a8ebe978a6a10f79a959de2197a382345645c4fdf0dea05b326da +size 14575 diff --git a/checkpoint-60/scheduler.pt b/checkpoint-60/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2425e63d70352eccd242f7b31acd4a4caeae8219 --- /dev/null +++ b/checkpoint-60/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cec60983a98e8b83150f00f3aa55cd737ceaf31e404f45b98b313a2022906fb4 +size 627 diff --git a/checkpoint-60/trainer_state.json b/checkpoint-60/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6a0993fdf64cb71bfe2178e5fad7d4e84393be09 --- /dev/null +++ b/checkpoint-60/trainer_state.json @@ -0,0 +1,427 @@ +{ + "best_metric": 2.0228564739227295, + "best_model_checkpoint": "./qlora-out-kimiko-test2/checkpoint-60", + "epoch": 1.5508885298869144, + "eval_steps": 10, + "global_step": 60, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03, + "learning_rate": 1e-05, + "loss": 1.5707, + "step": 1 + }, + { + "epoch": 0.05, + "learning_rate": 2e-05, + "loss": 1.5621, + "step": 2 + }, + { + "epoch": 0.08, + "learning_rate": 3e-05, + "loss": 1.4812, + "step": 3 + }, + { + "epoch": 0.1, + "learning_rate": 4e-05, + "loss": 1.5197, + "step": 4 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 1.5567, + "step": 5 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 1.4645, + "step": 6 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 1.6122, + "step": 7 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 1.5596, + "step": 8 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 1.5608, + "step": 9 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 1.5456, + "step": 10 + }, + { + "epoch": 0.26, + "eval_loss": 2.098437547683716, + "eval_runtime": 119.6161, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 0.777, + "step": 10 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 1.5645, + "step": 11 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 1.538, + "step": 12 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 1.6388, + "step": 13 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 1.4943, + "step": 14 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 1.5469, + "step": 15 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 1.6149, + "step": 16 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 1.5345, + "step": 17 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 1.4903, + "step": 18 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 1.5499, + "step": 19 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 1.5934, + "step": 20 + }, + { + "epoch": 0.52, + "eval_loss": 2.066981077194214, + "eval_runtime": 119.5781, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 0.778, + "step": 20 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 1.4554, + "step": 21 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 1.5512, + "step": 22 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 1.4636, + "step": 23 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 1.5398, + "step": 24 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 1.5623, + "step": 25 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 1.4658, + "step": 26 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 1.4723, + "step": 27 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 1.432, + "step": 28 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 1.4814, + "step": 29 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 1.4924, + "step": 30 + }, + { + "epoch": 0.78, + "eval_loss": 2.046339750289917, + "eval_runtime": 119.5771, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 0.778, + "step": 30 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 1.5809, + "step": 31 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 1.4803, + "step": 32 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 1.4878, + "step": 33 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 1.3871, + "step": 34 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 1.5151, + "step": 35 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 1.4212, + "step": 36 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 1.6284, + "step": 37 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 1.5002, + "step": 38 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 1.4452, + "step": 39 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 1.4399, + "step": 40 + }, + { + "epoch": 1.03, + "eval_loss": 2.0354697704315186, + "eval_runtime": 119.5875, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 0.778, + "step": 40 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 1.465, + "step": 41 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 1.4199, + "step": 42 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 1.5403, + "step": 43 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 1.4499, + "step": 44 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 1.5751, + "step": 45 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 1.4809, + "step": 46 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 1.5022, + "step": 47 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 1.4663, + "step": 48 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 1.4435, + "step": 49 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 1.4246, + "step": 50 + }, + { + "epoch": 1.29, + "eval_loss": 2.0276732444763184, + "eval_runtime": 119.5811, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 0.778, + "step": 50 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 1.4877, + "step": 51 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 1.4066, + "step": 52 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 1.3559, + "step": 53 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 1.5591, + "step": 54 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 1.4942, + "step": 55 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 1.4685, + "step": 56 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 1.4165, + "step": 57 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 1.3995, + "step": 58 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 1.3931, + "step": 59 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 1.4234, + "step": 60 + }, + { + "epoch": 1.55, + "eval_loss": 2.0228564739227295, + "eval_runtime": 119.5715, + "eval_samples_per_second": 1.556, + "eval_steps_per_second": 0.778, + "step": 60 + } + ], + "logging_steps": 1, + "max_steps": 114, + "num_train_epochs": 3, + "save_steps": 10, + "total_flos": 2.7433999518793728e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-60/training_args.bin b/checkpoint-60/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8846f68deb94b60cd596e766674727db1bceb44e --- /dev/null +++ b/checkpoint-60/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca362c6e6a1bbe2523b2190a501c92d6dbb3db6186bef551619d83852cca3df1 +size 4219 diff --git a/checkpoint-70/README.md b/checkpoint-70/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/checkpoint-70/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-70/adapter_config.json b/checkpoint-70/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a --- /dev/null +++ b/checkpoint-70/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "NousResearch/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "up_proj", + "gate_proj", + "o_proj", + "k_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-70/adapter_model.bin b/checkpoint-70/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..38e26ac92914f3d9564f14fd978ad5e921b440fa --- /dev/null +++ b/checkpoint-70/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48a7e75898b7a7dd2d629293916f814af1562c4efdb5a70d04ae946f12b0bb33 +size 62788109 diff --git a/checkpoint-70/adapter_model/README.md b/checkpoint-70/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/checkpoint-70/adapter_model/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-70/adapter_model/adapter_config.json b/checkpoint-70/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a --- /dev/null +++ b/checkpoint-70/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "NousResearch/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "up_proj", + "gate_proj", + "o_proj", + "k_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-70/adapter_model/adapter_model.bin b/checkpoint-70/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..38e26ac92914f3d9564f14fd978ad5e921b440fa --- /dev/null +++ b/checkpoint-70/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48a7e75898b7a7dd2d629293916f814af1562c4efdb5a70d04ae946f12b0bb33 +size 62788109 diff --git a/checkpoint-70/optimizer.pt b/checkpoint-70/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..58fb41cb8141f937dfb28c6e141983d0f18e6d0c --- /dev/null +++ b/checkpoint-70/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac90a2b24be0197a3df0f5297600340ef66e29ea745dfe64ec949a6535612a33 +size 250681597 diff --git a/checkpoint-70/rng_state.pth b/checkpoint-70/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..04ab6fd12ed4050522c177c1b9c48b853478b278 --- /dev/null +++ b/checkpoint-70/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ac73dfa960e69adad05f49630686f59d1ae4a2402121d980c5716dfea26fa8a +size 14575 diff --git a/checkpoint-70/scheduler.pt b/checkpoint-70/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d450881a8ac82a0d1930697b19b8271841dfc3a3 --- /dev/null +++ b/checkpoint-70/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8071d10c923b8842261293fddda48a72a72ada30d3518ad651a06114395d264b +size 627 diff --git a/checkpoint-70/trainer_state.json b/checkpoint-70/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3f4033849b6bf8eea6427977b9c178ee11b576d7 --- /dev/null +++ b/checkpoint-70/trainer_state.json @@ -0,0 +1,495 @@ +{ + "best_metric": 2.0199856758117676, + "best_model_checkpoint": "./qlora-out-kimiko-test2/checkpoint-70", + "epoch": 1.8093699515347335, + "eval_steps": 10, + "global_step": 70, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03, + "learning_rate": 1e-05, + "loss": 1.5707, + "step": 1 + }, + { + "epoch": 0.05, + "learning_rate": 2e-05, + "loss": 1.5621, + "step": 2 + }, + { + "epoch": 0.08, + "learning_rate": 3e-05, + "loss": 1.4812, + "step": 3 + }, + { + "epoch": 0.1, + "learning_rate": 4e-05, + "loss": 1.5197, + "step": 4 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 1.5567, + "step": 5 + }, + { + "epoch": 0.16, + "learning_rate": 5e-05, + "loss": 1.4645, + "step": 6 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 1.6122, + "step": 7 + }, + { + "epoch": 0.21, + "learning_rate": 5e-05, + "loss": 1.5596, + "step": 8 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 1.5608, + "step": 9 + }, + { + "epoch": 0.26, + "learning_rate": 5e-05, + "loss": 1.5456, + "step": 10 + }, + { + "epoch": 0.26, + "eval_loss": 2.098437547683716, + "eval_runtime": 119.6161, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 0.777, + "step": 10 + }, + { + "epoch": 0.28, + "learning_rate": 5e-05, + "loss": 1.5645, + "step": 11 + }, + { + "epoch": 0.31, + "learning_rate": 5e-05, + "loss": 1.538, + "step": 12 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 1.6388, + "step": 13 + }, + { + "epoch": 0.36, + "learning_rate": 5e-05, + "loss": 1.4943, + "step": 14 + }, + { + "epoch": 0.39, + "learning_rate": 5e-05, + "loss": 1.5469, + "step": 15 + }, + { + "epoch": 0.41, + "learning_rate": 5e-05, + "loss": 1.6149, + "step": 16 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 1.5345, + "step": 17 + }, + { + "epoch": 0.47, + "learning_rate": 5e-05, + "loss": 1.4903, + "step": 18 + }, + { + "epoch": 0.49, + "learning_rate": 5e-05, + "loss": 1.5499, + "step": 19 + }, + { + "epoch": 0.52, + "learning_rate": 5e-05, + "loss": 1.5934, + "step": 20 + }, + { + "epoch": 0.52, + "eval_loss": 2.066981077194214, + "eval_runtime": 119.5781, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 0.778, + "step": 20 + }, + { + "epoch": 0.54, + "learning_rate": 5e-05, + "loss": 1.4554, + "step": 21 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 1.5512, + "step": 22 + }, + { + "epoch": 0.59, + "learning_rate": 5e-05, + "loss": 1.4636, + "step": 23 + }, + { + "epoch": 0.62, + "learning_rate": 5e-05, + "loss": 1.5398, + "step": 24 + }, + { + "epoch": 0.65, + "learning_rate": 5e-05, + "loss": 1.5623, + "step": 25 + }, + { + "epoch": 0.67, + "learning_rate": 5e-05, + "loss": 1.4658, + "step": 26 + }, + { + "epoch": 0.7, + "learning_rate": 5e-05, + "loss": 1.4723, + "step": 27 + }, + { + "epoch": 0.72, + "learning_rate": 5e-05, + "loss": 1.432, + "step": 28 + }, + { + "epoch": 0.75, + "learning_rate": 5e-05, + "loss": 1.4814, + "step": 29 + }, + { + "epoch": 0.78, + "learning_rate": 5e-05, + "loss": 1.4924, + "step": 30 + }, + { + "epoch": 0.78, + "eval_loss": 2.046339750289917, + "eval_runtime": 119.5771, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 0.778, + "step": 30 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 1.5809, + "step": 31 + }, + { + "epoch": 0.83, + "learning_rate": 5e-05, + "loss": 1.4803, + "step": 32 + }, + { + "epoch": 0.85, + "learning_rate": 5e-05, + "loss": 1.4878, + "step": 33 + }, + { + "epoch": 0.88, + "learning_rate": 5e-05, + "loss": 1.3871, + "step": 34 + }, + { + "epoch": 0.9, + "learning_rate": 5e-05, + "loss": 1.5151, + "step": 35 + }, + { + "epoch": 0.93, + "learning_rate": 5e-05, + "loss": 1.4212, + "step": 36 + }, + { + "epoch": 0.96, + "learning_rate": 5e-05, + "loss": 1.6284, + "step": 37 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 1.5002, + "step": 38 + }, + { + "epoch": 1.01, + "learning_rate": 5e-05, + "loss": 1.4452, + "step": 39 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 1.4399, + "step": 40 + }, + { + "epoch": 1.03, + "eval_loss": 2.0354697704315186, + "eval_runtime": 119.5875, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 0.778, + "step": 40 + }, + { + "epoch": 1.06, + "learning_rate": 5e-05, + "loss": 1.465, + "step": 41 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 1.4199, + "step": 42 + }, + { + "epoch": 1.11, + "learning_rate": 5e-05, + "loss": 1.5403, + "step": 43 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 1.4499, + "step": 44 + }, + { + "epoch": 1.16, + "learning_rate": 5e-05, + "loss": 1.5751, + "step": 45 + }, + { + "epoch": 1.19, + "learning_rate": 5e-05, + "loss": 1.4809, + "step": 46 + }, + { + "epoch": 1.21, + "learning_rate": 5e-05, + "loss": 1.5022, + "step": 47 + }, + { + "epoch": 1.24, + "learning_rate": 5e-05, + "loss": 1.4663, + "step": 48 + }, + { + "epoch": 1.27, + "learning_rate": 5e-05, + "loss": 1.4435, + "step": 49 + }, + { + "epoch": 1.29, + "learning_rate": 5e-05, + "loss": 1.4246, + "step": 50 + }, + { + "epoch": 1.29, + "eval_loss": 2.0276732444763184, + "eval_runtime": 119.5811, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 0.778, + "step": 50 + }, + { + "epoch": 1.32, + "learning_rate": 5e-05, + "loss": 1.4877, + "step": 51 + }, + { + "epoch": 1.34, + "learning_rate": 5e-05, + "loss": 1.4066, + "step": 52 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 1.3559, + "step": 53 + }, + { + "epoch": 1.4, + "learning_rate": 5e-05, + "loss": 1.5591, + "step": 54 + }, + { + "epoch": 1.42, + "learning_rate": 5e-05, + "loss": 1.4942, + "step": 55 + }, + { + "epoch": 1.45, + "learning_rate": 5e-05, + "loss": 1.4685, + "step": 56 + }, + { + "epoch": 1.47, + "learning_rate": 5e-05, + "loss": 1.4165, + "step": 57 + }, + { + "epoch": 1.5, + "learning_rate": 5e-05, + "loss": 1.3995, + "step": 58 + }, + { + "epoch": 1.53, + "learning_rate": 5e-05, + "loss": 1.3931, + "step": 59 + }, + { + "epoch": 1.55, + "learning_rate": 5e-05, + "loss": 1.4234, + "step": 60 + }, + { + "epoch": 1.55, + "eval_loss": 2.0228564739227295, + "eval_runtime": 119.5715, + "eval_samples_per_second": 1.556, + "eval_steps_per_second": 0.778, + "step": 60 + }, + { + "epoch": 1.58, + "learning_rate": 5e-05, + "loss": 1.4732, + "step": 61 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 1.4349, + "step": 62 + }, + { + "epoch": 1.63, + "learning_rate": 5e-05, + "loss": 1.4548, + "step": 63 + }, + { + "epoch": 1.65, + "learning_rate": 5e-05, + "loss": 1.48, + "step": 64 + }, + { + "epoch": 1.68, + "learning_rate": 5e-05, + "loss": 1.3789, + "step": 65 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 1.3915, + "step": 66 + }, + { + "epoch": 1.73, + "learning_rate": 5e-05, + "loss": 1.3789, + "step": 67 + }, + { + "epoch": 1.76, + "learning_rate": 5e-05, + "loss": 1.5206, + "step": 68 + }, + { + "epoch": 1.78, + "learning_rate": 5e-05, + "loss": 1.4851, + "step": 69 + }, + { + "epoch": 1.81, + "learning_rate": 5e-05, + "loss": 1.5251, + "step": 70 + }, + { + "epoch": 1.81, + "eval_loss": 2.0199856758117676, + "eval_runtime": 119.5994, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 0.778, + "step": 70 + } + ], + "logging_steps": 1, + "max_steps": 114, + "num_train_epochs": 3, + "save_steps": 10, + "total_flos": 3.2059276062621696e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-70/training_args.bin b/checkpoint-70/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8846f68deb94b60cd596e766674727db1bceb44e --- /dev/null +++ b/checkpoint-70/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca362c6e6a1bbe2523b2190a501c92d6dbb3db6186bef551619d83852cca3df1 +size 4219 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3f58a5e115855c6ea3cec98accae196ad927222e --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,6 @@ +{ + "bos_token": "", + "eos_token": "", + "pad_token": "[PAD]", + "unk_token": "" +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..65532c2074f7f407d2d801a559f3d90aa5137f26 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,38 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "bos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "clean_up_tokenization_spaces": false, + "eos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "legacy": false, + "model_max_length": 1000000000000000019884624838656, + "pad_token": null, + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "trust_remote_code": false, + "unk_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "use_default_system_prompt": true, + "use_fast": true +}