diff --git a/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/.ipynb_checkpoints/Untitled-checkpoint.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..363fcab7ed6e9634e198cf5555ceb88932c9a245
--- /dev/null
+++ b/.ipynb_checkpoints/Untitled-checkpoint.ipynb
@@ -0,0 +1,6 @@
+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Untitled.ipynb b/Untitled.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..56ed1c5c7239e2dc207025810bfa1eba25ee586d
--- /dev/null
+++ b/Untitled.ipynb
@@ -0,0 +1,50 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "127668ee-44f0-4438-9337-e7c4a486aea3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from huggingface_hub import HfApi\n",
+    "\n",
+    "api = HfApi()\n",
+    "\n",
+    "# Upload all the content from the local folder to your remote Space.\n",
+    "# By default, files are uploaded at the root of the repo\n",
+    "\n",
+    "api.upload_folder(\n",
+    "\n",
+    "    folder_path=r\"C:\\dataset\\New folder\",\n",
+    "\n",
+    "    repo_id=\"MinervaAI/Random-roleplay-instruction\",\n",
+    "\n",
+    "    repo_type=\"dataset\",\n",
+    "\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/adapter_config.json b/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..7f7646c3b0f63338a5c87f9f689809311fb57d15
--- /dev/null
+++ b/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": false,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/added_tokens.json b/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..9c16aa4be022f03ad001b006fba14dfb73a1929c
--- /dev/null
+++ b/added_tokens.json
@@ -0,0 +1,3 @@
+{
+  "<pad>": 32000
+}
diff --git a/checkpoint-10/README.md b/checkpoint-10/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..5a1a8e25fab70af1859fa3c79fcd46ce9c8ff92f
--- /dev/null
+++ b/checkpoint-10/README.md
@@ -0,0 +1,34 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+- PEFT 0.6.0.dev0
+
+- PEFT 0.6.0.dev0
diff --git a/checkpoint-10/adapter_config.json b/checkpoint-10/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a
--- /dev/null
+++ b/checkpoint-10/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/checkpoint-10/adapter_model.bin b/checkpoint-10/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ad502817425d5cbd7f2db7c5f80f3d34e4aa3c09
--- /dev/null
+++ b/checkpoint-10/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d73dd5f0590e7395a03feab2c12262e8144fdb10f84d6cc08f5c2b521c7d832
+size 62788109
diff --git a/checkpoint-10/adapter_model/README.md b/checkpoint-10/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..5a1a8e25fab70af1859fa3c79fcd46ce9c8ff92f
--- /dev/null
+++ b/checkpoint-10/adapter_model/README.md
@@ -0,0 +1,34 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+- PEFT 0.6.0.dev0
+
+- PEFT 0.6.0.dev0
diff --git a/checkpoint-10/adapter_model/adapter_config.json b/checkpoint-10/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a
--- /dev/null
+++ b/checkpoint-10/adapter_model/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/checkpoint-10/adapter_model/adapter_model.bin b/checkpoint-10/adapter_model/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ad502817425d5cbd7f2db7c5f80f3d34e4aa3c09
--- /dev/null
+++ b/checkpoint-10/adapter_model/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d73dd5f0590e7395a03feab2c12262e8144fdb10f84d6cc08f5c2b521c7d832
+size 62788109
diff --git a/checkpoint-10/optimizer.pt b/checkpoint-10/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9d3b14bc6d88e74a9c278965db08c3f4531c8856
--- /dev/null
+++ b/checkpoint-10/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93fc15492ebf7acd0d7da2effdc336db5cf94da3d033e4bfcba017a5ba9ff578
+size 250681597
diff --git a/checkpoint-10/rng_state.pth b/checkpoint-10/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3941b167aeed93112d0719c958b58e72f2ec8bd2
--- /dev/null
+++ b/checkpoint-10/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b8d0d39ee3f495298b1364d1a694b15161510d7f6b4a57e4ad295a4590655a19
+size 14575
diff --git a/checkpoint-10/scheduler.pt b/checkpoint-10/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..19b0f5c3b6b112717270d933dc00cea683e16262
--- /dev/null
+++ b/checkpoint-10/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3f23bc239cbe3e7a4f862f2448a71aa49fc3b361c6c5866cc00242a779c0be4
+size 627
diff --git a/checkpoint-10/trainer_state.json b/checkpoint-10/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..5cd9682d5a75a3cf56fb5989028c9dc62ed1d503
--- /dev/null
+++ b/checkpoint-10/trainer_state.json
@@ -0,0 +1,87 @@
+{
+  "best_metric": 2.098437547683716,
+  "best_model_checkpoint": "./qlora-out-kimiko-test2/checkpoint-10",
+  "epoch": 0.25848142164781907,
+  "eval_steps": 10,
+  "global_step": 10,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.03,
+      "learning_rate": 1e-05,
+      "loss": 1.5707,
+      "step": 1
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 2e-05,
+      "loss": 1.5621,
+      "step": 2
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 3e-05,
+      "loss": 1.4812,
+      "step": 3
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 4e-05,
+      "loss": 1.5197,
+      "step": 4
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 5e-05,
+      "loss": 1.5567,
+      "step": 5
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 5e-05,
+      "loss": 1.4645,
+      "step": 6
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 5e-05,
+      "loss": 1.6122,
+      "step": 7
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 5e-05,
+      "loss": 1.5596,
+      "step": 8
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 5e-05,
+      "loss": 1.5608,
+      "step": 9
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 5e-05,
+      "loss": 1.5456,
+      "step": 10
+    },
+    {
+      "epoch": 0.26,
+      "eval_loss": 2.098437547683716,
+      "eval_runtime": 119.6161,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.777,
+      "step": 10
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 114,
+  "num_train_epochs": 3,
+  "save_steps": 10,
+  "total_flos": 4.582587092041728e+16,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-10/training_args.bin b/checkpoint-10/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8846f68deb94b60cd596e766674727db1bceb44e
--- /dev/null
+++ b/checkpoint-10/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca362c6e6a1bbe2523b2190a501c92d6dbb3db6186bef551619d83852cca3df1
+size 4219
diff --git a/checkpoint-20/README.md b/checkpoint-20/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d
--- /dev/null
+++ b/checkpoint-20/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.6.0.dev0
diff --git a/checkpoint-20/adapter_config.json b/checkpoint-20/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a
--- /dev/null
+++ b/checkpoint-20/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/checkpoint-20/adapter_model.bin b/checkpoint-20/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7615732f2ddc126ab3490324549af7e959adc804
--- /dev/null
+++ b/checkpoint-20/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:03d0c38222a997a52bfe76857917321fb2bc8d10553b4da4ff8390643c25f962
+size 62788109
diff --git a/checkpoint-20/adapter_model/README.md b/checkpoint-20/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d
--- /dev/null
+++ b/checkpoint-20/adapter_model/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.6.0.dev0
diff --git a/checkpoint-20/adapter_model/adapter_config.json b/checkpoint-20/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a
--- /dev/null
+++ b/checkpoint-20/adapter_model/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/checkpoint-20/adapter_model/adapter_model.bin b/checkpoint-20/adapter_model/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7615732f2ddc126ab3490324549af7e959adc804
--- /dev/null
+++ b/checkpoint-20/adapter_model/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:03d0c38222a997a52bfe76857917321fb2bc8d10553b4da4ff8390643c25f962
+size 62788109
diff --git a/checkpoint-20/optimizer.pt b/checkpoint-20/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0a7f282f053f06ee4b0703c9ff8634995fe830bf
--- /dev/null
+++ b/checkpoint-20/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56e8c6c5c1d0e07913575e95580988f8c4d4d451e12e22619c79c5df6b48dc53
+size 250681597
diff --git a/checkpoint-20/rng_state.pth b/checkpoint-20/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f314eb61d07ca624a4eb51cab0bec85c72aab6d1
--- /dev/null
+++ b/checkpoint-20/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6cf66b0a499885f833314015600150cb6bfa74e1505e5608890c7c4ba655a6ba
+size 14575
diff --git a/checkpoint-20/scheduler.pt b/checkpoint-20/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..75b0bacc84680716c87f31c95924d0e103e050af
--- /dev/null
+++ b/checkpoint-20/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e64bd2871a807ae2da11c5073d611b4d4223c336499ef21e9e856aaa448a1a35
+size 627
diff --git a/checkpoint-20/trainer_state.json b/checkpoint-20/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..f37a8a2dd39940b76a609b63a172eaff92ff7b55
--- /dev/null
+++ b/checkpoint-20/trainer_state.json
@@ -0,0 +1,155 @@
+{
+  "best_metric": 2.066981077194214,
+  "best_model_checkpoint": "./qlora-out-kimiko-test2/checkpoint-20",
+  "epoch": 0.5169628432956381,
+  "eval_steps": 10,
+  "global_step": 20,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.03,
+      "learning_rate": 1e-05,
+      "loss": 1.5707,
+      "step": 1
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 2e-05,
+      "loss": 1.5621,
+      "step": 2
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 3e-05,
+      "loss": 1.4812,
+      "step": 3
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 4e-05,
+      "loss": 1.5197,
+      "step": 4
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 5e-05,
+      "loss": 1.5567,
+      "step": 5
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 5e-05,
+      "loss": 1.4645,
+      "step": 6
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 5e-05,
+      "loss": 1.6122,
+      "step": 7
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 5e-05,
+      "loss": 1.5596,
+      "step": 8
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 5e-05,
+      "loss": 1.5608,
+      "step": 9
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 5e-05,
+      "loss": 1.5456,
+      "step": 10
+    },
+    {
+      "epoch": 0.26,
+      "eval_loss": 2.098437547683716,
+      "eval_runtime": 119.6161,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.777,
+      "step": 10
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 5e-05,
+      "loss": 1.5645,
+      "step": 11
+    },
+    {
+      "epoch": 0.31,
+      "learning_rate": 5e-05,
+      "loss": 1.538,
+      "step": 12
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 5e-05,
+      "loss": 1.6388,
+      "step": 13
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 5e-05,
+      "loss": 1.4943,
+      "step": 14
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 5e-05,
+      "loss": 1.5469,
+      "step": 15
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 5e-05,
+      "loss": 1.6149,
+      "step": 16
+    },
+    {
+      "epoch": 0.44,
+      "learning_rate": 5e-05,
+      "loss": 1.5345,
+      "step": 17
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 5e-05,
+      "loss": 1.4903,
+      "step": 18
+    },
+    {
+      "epoch": 0.49,
+      "learning_rate": 5e-05,
+      "loss": 1.5499,
+      "step": 19
+    },
+    {
+      "epoch": 0.52,
+      "learning_rate": 5e-05,
+      "loss": 1.5934,
+      "step": 20
+    },
+    {
+      "epoch": 0.52,
+      "eval_loss": 2.066981077194214,
+      "eval_runtime": 119.5781,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.778,
+      "step": 20
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 114,
+  "num_train_epochs": 3,
+  "save_steps": 10,
+  "total_flos": 9.195307914756096e+16,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-20/training_args.bin b/checkpoint-20/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8846f68deb94b60cd596e766674727db1bceb44e
--- /dev/null
+++ b/checkpoint-20/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca362c6e6a1bbe2523b2190a501c92d6dbb3db6186bef551619d83852cca3df1
+size 4219
diff --git a/checkpoint-30/README.md b/checkpoint-30/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d
--- /dev/null
+++ b/checkpoint-30/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.6.0.dev0
diff --git a/checkpoint-30/adapter_config.json b/checkpoint-30/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a
--- /dev/null
+++ b/checkpoint-30/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/checkpoint-30/adapter_model.bin b/checkpoint-30/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..60e8958adfb7d66a5df857f7f3279b8f60779563
--- /dev/null
+++ b/checkpoint-30/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4276aa46f33d393edc2d19308e186a1f07580d76eddb0bb6339bc4f3e80b9f58
+size 62788109
diff --git a/checkpoint-30/adapter_model/README.md b/checkpoint-30/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d
--- /dev/null
+++ b/checkpoint-30/adapter_model/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.6.0.dev0
diff --git a/checkpoint-30/adapter_model/adapter_config.json b/checkpoint-30/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a
--- /dev/null
+++ b/checkpoint-30/adapter_model/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/checkpoint-30/adapter_model/adapter_model.bin b/checkpoint-30/adapter_model/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..60e8958adfb7d66a5df857f7f3279b8f60779563
--- /dev/null
+++ b/checkpoint-30/adapter_model/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4276aa46f33d393edc2d19308e186a1f07580d76eddb0bb6339bc4f3e80b9f58
+size 62788109
diff --git a/checkpoint-30/optimizer.pt b/checkpoint-30/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5e9f4c649b429c679bfb26c27d7de757f79bfe93
--- /dev/null
+++ b/checkpoint-30/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b18fc6beca12122a3d4309af96449623e23a0d54f4b1c5088810cb8933a25809
+size 250681597
diff --git a/checkpoint-30/rng_state.pth b/checkpoint-30/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..03e811514791be839125b38c0162643843b81a6a
--- /dev/null
+++ b/checkpoint-30/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cbf64f9f720011db899c09efa51061d9067d83e6fc2235e5bb9d7087d72402fd
+size 14575
diff --git a/checkpoint-30/scheduler.pt b/checkpoint-30/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f24f347cc22fffc6fc6e5780b42991023f54ce34
--- /dev/null
+++ b/checkpoint-30/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:298c19c2d0215ea63da7419132c9dac6c9c75fdd531e32dfc1cb87dbb0aa8259
+size 627
diff --git a/checkpoint-30/trainer_state.json b/checkpoint-30/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..9d45cbc591335acc0ae193b0b81103a0c8f47add
--- /dev/null
+++ b/checkpoint-30/trainer_state.json
@@ -0,0 +1,223 @@
+{
+  "best_metric": 2.046339750289917,
+  "best_model_checkpoint": "./qlora-out-kimiko-test2/checkpoint-30",
+  "epoch": 0.7754442649434572,
+  "eval_steps": 10,
+  "global_step": 30,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.03,
+      "learning_rate": 1e-05,
+      "loss": 1.5707,
+      "step": 1
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 2e-05,
+      "loss": 1.5621,
+      "step": 2
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 3e-05,
+      "loss": 1.4812,
+      "step": 3
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 4e-05,
+      "loss": 1.5197,
+      "step": 4
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 5e-05,
+      "loss": 1.5567,
+      "step": 5
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 5e-05,
+      "loss": 1.4645,
+      "step": 6
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 5e-05,
+      "loss": 1.6122,
+      "step": 7
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 5e-05,
+      "loss": 1.5596,
+      "step": 8
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 5e-05,
+      "loss": 1.5608,
+      "step": 9
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 5e-05,
+      "loss": 1.5456,
+      "step": 10
+    },
+    {
+      "epoch": 0.26,
+      "eval_loss": 2.098437547683716,
+      "eval_runtime": 119.6161,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.777,
+      "step": 10
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 5e-05,
+      "loss": 1.5645,
+      "step": 11
+    },
+    {
+      "epoch": 0.31,
+      "learning_rate": 5e-05,
+      "loss": 1.538,
+      "step": 12
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 5e-05,
+      "loss": 1.6388,
+      "step": 13
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 5e-05,
+      "loss": 1.4943,
+      "step": 14
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 5e-05,
+      "loss": 1.5469,
+      "step": 15
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 5e-05,
+      "loss": 1.6149,
+      "step": 16
+    },
+    {
+      "epoch": 0.44,
+      "learning_rate": 5e-05,
+      "loss": 1.5345,
+      "step": 17
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 5e-05,
+      "loss": 1.4903,
+      "step": 18
+    },
+    {
+      "epoch": 0.49,
+      "learning_rate": 5e-05,
+      "loss": 1.5499,
+      "step": 19
+    },
+    {
+      "epoch": 0.52,
+      "learning_rate": 5e-05,
+      "loss": 1.5934,
+      "step": 20
+    },
+    {
+      "epoch": 0.52,
+      "eval_loss": 2.066981077194214,
+      "eval_runtime": 119.5781,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.778,
+      "step": 20
+    },
+    {
+      "epoch": 0.54,
+      "learning_rate": 5e-05,
+      "loss": 1.4554,
+      "step": 21
+    },
+    {
+      "epoch": 0.57,
+      "learning_rate": 5e-05,
+      "loss": 1.5512,
+      "step": 22
+    },
+    {
+      "epoch": 0.59,
+      "learning_rate": 5e-05,
+      "loss": 1.4636,
+      "step": 23
+    },
+    {
+      "epoch": 0.62,
+      "learning_rate": 5e-05,
+      "loss": 1.5398,
+      "step": 24
+    },
+    {
+      "epoch": 0.65,
+      "learning_rate": 5e-05,
+      "loss": 1.5623,
+      "step": 25
+    },
+    {
+      "epoch": 0.67,
+      "learning_rate": 5e-05,
+      "loss": 1.4658,
+      "step": 26
+    },
+    {
+      "epoch": 0.7,
+      "learning_rate": 5e-05,
+      "loss": 1.4723,
+      "step": 27
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 5e-05,
+      "loss": 1.432,
+      "step": 28
+    },
+    {
+      "epoch": 0.75,
+      "learning_rate": 5e-05,
+      "loss": 1.4814,
+      "step": 29
+    },
+    {
+      "epoch": 0.78,
+      "learning_rate": 5e-05,
+      "loss": 1.4924,
+      "step": 30
+    },
+    {
+      "epoch": 0.78,
+      "eval_loss": 2.046339750289917,
+      "eval_runtime": 119.5771,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.778,
+      "step": 30
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 114,
+  "num_train_epochs": 3,
+  "save_steps": 10,
+  "total_flos": 1.3777643892375552e+17,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-30/training_args.bin b/checkpoint-30/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8846f68deb94b60cd596e766674727db1bceb44e
--- /dev/null
+++ b/checkpoint-30/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca362c6e6a1bbe2523b2190a501c92d6dbb3db6186bef551619d83852cca3df1
+size 4219
diff --git a/checkpoint-40/README.md b/checkpoint-40/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d
--- /dev/null
+++ b/checkpoint-40/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.6.0.dev0
diff --git a/checkpoint-40/adapter_config.json b/checkpoint-40/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a
--- /dev/null
+++ b/checkpoint-40/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/checkpoint-40/adapter_model.bin b/checkpoint-40/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e6c257f607b69d5c1b527e6ebf965b1cf1bc3f4f
--- /dev/null
+++ b/checkpoint-40/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:12f5a629e5ff4db207707d71a1c324a97283a0a371cbd435fb090ea711a9e21c
+size 62788109
diff --git a/checkpoint-40/adapter_model/README.md b/checkpoint-40/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d
--- /dev/null
+++ b/checkpoint-40/adapter_model/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.6.0.dev0
diff --git a/checkpoint-40/adapter_model/adapter_config.json b/checkpoint-40/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a
--- /dev/null
+++ b/checkpoint-40/adapter_model/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/checkpoint-40/adapter_model/adapter_model.bin b/checkpoint-40/adapter_model/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e6c257f607b69d5c1b527e6ebf965b1cf1bc3f4f
--- /dev/null
+++ b/checkpoint-40/adapter_model/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:12f5a629e5ff4db207707d71a1c324a97283a0a371cbd435fb090ea711a9e21c
+size 62788109
diff --git a/checkpoint-40/optimizer.pt b/checkpoint-40/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e771a88866d3ad318eaae2e8fdd8e84fce4c1e2
--- /dev/null
+++ b/checkpoint-40/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8c0a407654387b576e7ec5d641bcd7961201b07a96d39e22e3e9d77e541a513
+size 250681597
diff --git a/checkpoint-40/rng_state.pth b/checkpoint-40/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..79140fcff6aa1dc0216e428bff9bc901d2a4ce7d
--- /dev/null
+++ b/checkpoint-40/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ef3dbc703c54e9cba3cb628e0e6ae4c7f37310ab3398f30c0c5503dcb749f77
+size 14575
diff --git a/checkpoint-40/scheduler.pt b/checkpoint-40/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3810259f60773aafefb3d9d988e241d324ba4fd3
--- /dev/null
+++ b/checkpoint-40/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d048c63c660c9f2cb6345c4f756c8abee0e570482b06bf9c08827d79b257394
+size 627
diff --git a/checkpoint-40/trainer_state.json b/checkpoint-40/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..1f40efc586203ae9e7937918669e0177d6543aad
--- /dev/null
+++ b/checkpoint-40/trainer_state.json
@@ -0,0 +1,291 @@
+{
+  "best_metric": 2.0354697704315186,
+  "best_model_checkpoint": "./qlora-out-kimiko-test2/checkpoint-40",
+  "epoch": 1.0339256865912763,
+  "eval_steps": 10,
+  "global_step": 40,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.03,
+      "learning_rate": 1e-05,
+      "loss": 1.5707,
+      "step": 1
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 2e-05,
+      "loss": 1.5621,
+      "step": 2
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 3e-05,
+      "loss": 1.4812,
+      "step": 3
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 4e-05,
+      "loss": 1.5197,
+      "step": 4
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 5e-05,
+      "loss": 1.5567,
+      "step": 5
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 5e-05,
+      "loss": 1.4645,
+      "step": 6
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 5e-05,
+      "loss": 1.6122,
+      "step": 7
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 5e-05,
+      "loss": 1.5596,
+      "step": 8
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 5e-05,
+      "loss": 1.5608,
+      "step": 9
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 5e-05,
+      "loss": 1.5456,
+      "step": 10
+    },
+    {
+      "epoch": 0.26,
+      "eval_loss": 2.098437547683716,
+      "eval_runtime": 119.6161,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.777,
+      "step": 10
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 5e-05,
+      "loss": 1.5645,
+      "step": 11
+    },
+    {
+      "epoch": 0.31,
+      "learning_rate": 5e-05,
+      "loss": 1.538,
+      "step": 12
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 5e-05,
+      "loss": 1.6388,
+      "step": 13
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 5e-05,
+      "loss": 1.4943,
+      "step": 14
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 5e-05,
+      "loss": 1.5469,
+      "step": 15
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 5e-05,
+      "loss": 1.6149,
+      "step": 16
+    },
+    {
+      "epoch": 0.44,
+      "learning_rate": 5e-05,
+      "loss": 1.5345,
+      "step": 17
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 5e-05,
+      "loss": 1.4903,
+      "step": 18
+    },
+    {
+      "epoch": 0.49,
+      "learning_rate": 5e-05,
+      "loss": 1.5499,
+      "step": 19
+    },
+    {
+      "epoch": 0.52,
+      "learning_rate": 5e-05,
+      "loss": 1.5934,
+      "step": 20
+    },
+    {
+      "epoch": 0.52,
+      "eval_loss": 2.066981077194214,
+      "eval_runtime": 119.5781,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.778,
+      "step": 20
+    },
+    {
+      "epoch": 0.54,
+      "learning_rate": 5e-05,
+      "loss": 1.4554,
+      "step": 21
+    },
+    {
+      "epoch": 0.57,
+      "learning_rate": 5e-05,
+      "loss": 1.5512,
+      "step": 22
+    },
+    {
+      "epoch": 0.59,
+      "learning_rate": 5e-05,
+      "loss": 1.4636,
+      "step": 23
+    },
+    {
+      "epoch": 0.62,
+      "learning_rate": 5e-05,
+      "loss": 1.5398,
+      "step": 24
+    },
+    {
+      "epoch": 0.65,
+      "learning_rate": 5e-05,
+      "loss": 1.5623,
+      "step": 25
+    },
+    {
+      "epoch": 0.67,
+      "learning_rate": 5e-05,
+      "loss": 1.4658,
+      "step": 26
+    },
+    {
+      "epoch": 0.7,
+      "learning_rate": 5e-05,
+      "loss": 1.4723,
+      "step": 27
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 5e-05,
+      "loss": 1.432,
+      "step": 28
+    },
+    {
+      "epoch": 0.75,
+      "learning_rate": 5e-05,
+      "loss": 1.4814,
+      "step": 29
+    },
+    {
+      "epoch": 0.78,
+      "learning_rate": 5e-05,
+      "loss": 1.4924,
+      "step": 30
+    },
+    {
+      "epoch": 0.78,
+      "eval_loss": 2.046339750289917,
+      "eval_runtime": 119.5771,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.778,
+      "step": 30
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 5e-05,
+      "loss": 1.5809,
+      "step": 31
+    },
+    {
+      "epoch": 0.83,
+      "learning_rate": 5e-05,
+      "loss": 1.4803,
+      "step": 32
+    },
+    {
+      "epoch": 0.85,
+      "learning_rate": 5e-05,
+      "loss": 1.4878,
+      "step": 33
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 5e-05,
+      "loss": 1.3871,
+      "step": 34
+    },
+    {
+      "epoch": 0.9,
+      "learning_rate": 5e-05,
+      "loss": 1.5151,
+      "step": 35
+    },
+    {
+      "epoch": 0.93,
+      "learning_rate": 5e-05,
+      "loss": 1.4212,
+      "step": 36
+    },
+    {
+      "epoch": 0.96,
+      "learning_rate": 5e-05,
+      "loss": 1.6284,
+      "step": 37
+    },
+    {
+      "epoch": 0.98,
+      "learning_rate": 5e-05,
+      "loss": 1.5002,
+      "step": 38
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 5e-05,
+      "loss": 1.4452,
+      "step": 39
+    },
+    {
+      "epoch": 1.03,
+      "learning_rate": 5e-05,
+      "loss": 1.4399,
+      "step": 40
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 2.0354697704315186,
+      "eval_runtime": 119.5875,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.778,
+      "step": 40
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 114,
+  "num_train_epochs": 3,
+  "save_steps": 10,
+  "total_flos": 1.8296447921160192e+17,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-40/training_args.bin b/checkpoint-40/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8846f68deb94b60cd596e766674727db1bceb44e
--- /dev/null
+++ b/checkpoint-40/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca362c6e6a1bbe2523b2190a501c92d6dbb3db6186bef551619d83852cca3df1
+size 4219
diff --git a/checkpoint-50/README.md b/checkpoint-50/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d
--- /dev/null
+++ b/checkpoint-50/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.6.0.dev0
diff --git a/checkpoint-50/adapter_config.json b/checkpoint-50/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a
--- /dev/null
+++ b/checkpoint-50/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/checkpoint-50/adapter_model.bin b/checkpoint-50/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a8dc5d21ed69f5464baad4f11f5ca6c588e9a3b8
--- /dev/null
+++ b/checkpoint-50/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ad4a9568f022e507126ea7e1f8f36a695ac0ff02c75b05285a571a3eae428d6
+size 62788109
diff --git a/checkpoint-50/adapter_model/README.md b/checkpoint-50/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d
--- /dev/null
+++ b/checkpoint-50/adapter_model/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.6.0.dev0
diff --git a/checkpoint-50/adapter_model/adapter_config.json b/checkpoint-50/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a
--- /dev/null
+++ b/checkpoint-50/adapter_model/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/checkpoint-50/adapter_model/adapter_model.bin b/checkpoint-50/adapter_model/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a8dc5d21ed69f5464baad4f11f5ca6c588e9a3b8
--- /dev/null
+++ b/checkpoint-50/adapter_model/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ad4a9568f022e507126ea7e1f8f36a695ac0ff02c75b05285a571a3eae428d6
+size 62788109
diff --git a/checkpoint-50/optimizer.pt b/checkpoint-50/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e58467aaec4ac53b2ae92c0fc6071fe50e3a3025
--- /dev/null
+++ b/checkpoint-50/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9f6218594c41c55f8dc0cd47a6eb49ed24438fddc4fe764e95b27863494020d
+size 250681597
diff --git a/checkpoint-50/rng_state.pth b/checkpoint-50/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e00cea4f25415c21cab2a709c02e9e0f5cfb6d5c
--- /dev/null
+++ b/checkpoint-50/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:585434cf728ba17dfd089b29e20fd26f92ed2f8752cd52f32d800c2aad892a89
+size 14575
diff --git a/checkpoint-50/scheduler.pt b/checkpoint-50/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c887bcc2ab93c7ec18b5c5550fa2fa62c08d6685
--- /dev/null
+++ b/checkpoint-50/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:238db3d432ca0bc71d28453f24626baef9ff9f503195c9d58c104586d1a6c8a1
+size 627
diff --git a/checkpoint-50/trainer_state.json b/checkpoint-50/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..560f0a40b7440382c98bee072ba7d0153bd93a7e
--- /dev/null
+++ b/checkpoint-50/trainer_state.json
@@ -0,0 +1,359 @@
+{
+  "best_metric": 2.0276732444763184,
+  "best_model_checkpoint": "./qlora-out-kimiko-test2/checkpoint-50",
+  "epoch": 1.2924071082390953,
+  "eval_steps": 10,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.03,
+      "learning_rate": 1e-05,
+      "loss": 1.5707,
+      "step": 1
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 2e-05,
+      "loss": 1.5621,
+      "step": 2
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 3e-05,
+      "loss": 1.4812,
+      "step": 3
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 4e-05,
+      "loss": 1.5197,
+      "step": 4
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 5e-05,
+      "loss": 1.5567,
+      "step": 5
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 5e-05,
+      "loss": 1.4645,
+      "step": 6
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 5e-05,
+      "loss": 1.6122,
+      "step": 7
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 5e-05,
+      "loss": 1.5596,
+      "step": 8
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 5e-05,
+      "loss": 1.5608,
+      "step": 9
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 5e-05,
+      "loss": 1.5456,
+      "step": 10
+    },
+    {
+      "epoch": 0.26,
+      "eval_loss": 2.098437547683716,
+      "eval_runtime": 119.6161,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.777,
+      "step": 10
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 5e-05,
+      "loss": 1.5645,
+      "step": 11
+    },
+    {
+      "epoch": 0.31,
+      "learning_rate": 5e-05,
+      "loss": 1.538,
+      "step": 12
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 5e-05,
+      "loss": 1.6388,
+      "step": 13
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 5e-05,
+      "loss": 1.4943,
+      "step": 14
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 5e-05,
+      "loss": 1.5469,
+      "step": 15
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 5e-05,
+      "loss": 1.6149,
+      "step": 16
+    },
+    {
+      "epoch": 0.44,
+      "learning_rate": 5e-05,
+      "loss": 1.5345,
+      "step": 17
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 5e-05,
+      "loss": 1.4903,
+      "step": 18
+    },
+    {
+      "epoch": 0.49,
+      "learning_rate": 5e-05,
+      "loss": 1.5499,
+      "step": 19
+    },
+    {
+      "epoch": 0.52,
+      "learning_rate": 5e-05,
+      "loss": 1.5934,
+      "step": 20
+    },
+    {
+      "epoch": 0.52,
+      "eval_loss": 2.066981077194214,
+      "eval_runtime": 119.5781,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.778,
+      "step": 20
+    },
+    {
+      "epoch": 0.54,
+      "learning_rate": 5e-05,
+      "loss": 1.4554,
+      "step": 21
+    },
+    {
+      "epoch": 0.57,
+      "learning_rate": 5e-05,
+      "loss": 1.5512,
+      "step": 22
+    },
+    {
+      "epoch": 0.59,
+      "learning_rate": 5e-05,
+      "loss": 1.4636,
+      "step": 23
+    },
+    {
+      "epoch": 0.62,
+      "learning_rate": 5e-05,
+      "loss": 1.5398,
+      "step": 24
+    },
+    {
+      "epoch": 0.65,
+      "learning_rate": 5e-05,
+      "loss": 1.5623,
+      "step": 25
+    },
+    {
+      "epoch": 0.67,
+      "learning_rate": 5e-05,
+      "loss": 1.4658,
+      "step": 26
+    },
+    {
+      "epoch": 0.7,
+      "learning_rate": 5e-05,
+      "loss": 1.4723,
+      "step": 27
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 5e-05,
+      "loss": 1.432,
+      "step": 28
+    },
+    {
+      "epoch": 0.75,
+      "learning_rate": 5e-05,
+      "loss": 1.4814,
+      "step": 29
+    },
+    {
+      "epoch": 0.78,
+      "learning_rate": 5e-05,
+      "loss": 1.4924,
+      "step": 30
+    },
+    {
+      "epoch": 0.78,
+      "eval_loss": 2.046339750289917,
+      "eval_runtime": 119.5771,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.778,
+      "step": 30
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 5e-05,
+      "loss": 1.5809,
+      "step": 31
+    },
+    {
+      "epoch": 0.83,
+      "learning_rate": 5e-05,
+      "loss": 1.4803,
+      "step": 32
+    },
+    {
+      "epoch": 0.85,
+      "learning_rate": 5e-05,
+      "loss": 1.4878,
+      "step": 33
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 5e-05,
+      "loss": 1.3871,
+      "step": 34
+    },
+    {
+      "epoch": 0.9,
+      "learning_rate": 5e-05,
+      "loss": 1.5151,
+      "step": 35
+    },
+    {
+      "epoch": 0.93,
+      "learning_rate": 5e-05,
+      "loss": 1.4212,
+      "step": 36
+    },
+    {
+      "epoch": 0.96,
+      "learning_rate": 5e-05,
+      "loss": 1.6284,
+      "step": 37
+    },
+    {
+      "epoch": 0.98,
+      "learning_rate": 5e-05,
+      "loss": 1.5002,
+      "step": 38
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 5e-05,
+      "loss": 1.4452,
+      "step": 39
+    },
+    {
+      "epoch": 1.03,
+      "learning_rate": 5e-05,
+      "loss": 1.4399,
+      "step": 40
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 2.0354697704315186,
+      "eval_runtime": 119.5875,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.778,
+      "step": 40
+    },
+    {
+      "epoch": 1.06,
+      "learning_rate": 5e-05,
+      "loss": 1.465,
+      "step": 41
+    },
+    {
+      "epoch": 1.09,
+      "learning_rate": 5e-05,
+      "loss": 1.4199,
+      "step": 42
+    },
+    {
+      "epoch": 1.11,
+      "learning_rate": 5e-05,
+      "loss": 1.5403,
+      "step": 43
+    },
+    {
+      "epoch": 1.14,
+      "learning_rate": 5e-05,
+      "loss": 1.4499,
+      "step": 44
+    },
+    {
+      "epoch": 1.16,
+      "learning_rate": 5e-05,
+      "loss": 1.5751,
+      "step": 45
+    },
+    {
+      "epoch": 1.19,
+      "learning_rate": 5e-05,
+      "loss": 1.4809,
+      "step": 46
+    },
+    {
+      "epoch": 1.21,
+      "learning_rate": 5e-05,
+      "loss": 1.5022,
+      "step": 47
+    },
+    {
+      "epoch": 1.24,
+      "learning_rate": 5e-05,
+      "loss": 1.4663,
+      "step": 48
+    },
+    {
+      "epoch": 1.27,
+      "learning_rate": 5e-05,
+      "loss": 1.4435,
+      "step": 49
+    },
+    {
+      "epoch": 1.29,
+      "learning_rate": 5e-05,
+      "loss": 1.4246,
+      "step": 50
+    },
+    {
+      "epoch": 1.29,
+      "eval_loss": 2.0276732444763184,
+      "eval_runtime": 119.5811,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.778,
+      "step": 50
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 114,
+  "num_train_epochs": 3,
+  "save_steps": 10,
+  "total_flos": 2.2849403511373824e+17,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-50/training_args.bin b/checkpoint-50/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8846f68deb94b60cd596e766674727db1bceb44e
--- /dev/null
+++ b/checkpoint-50/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca362c6e6a1bbe2523b2190a501c92d6dbb3db6186bef551619d83852cca3df1
+size 4219
diff --git a/checkpoint-60/README.md b/checkpoint-60/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d
--- /dev/null
+++ b/checkpoint-60/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.6.0.dev0
diff --git a/checkpoint-60/adapter_config.json b/checkpoint-60/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a
--- /dev/null
+++ b/checkpoint-60/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/checkpoint-60/adapter_model.bin b/checkpoint-60/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0e54e4e0d2f0c0cc73ba5b0f8ec131c3c2c7279f
--- /dev/null
+++ b/checkpoint-60/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b327deb5fedf5ccbe9c1384b4cdb4dc01776fa03dbeb30a2214f6841f4567748
+size 62788109
diff --git a/checkpoint-60/adapter_model/README.md b/checkpoint-60/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d
--- /dev/null
+++ b/checkpoint-60/adapter_model/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.6.0.dev0
diff --git a/checkpoint-60/adapter_model/adapter_config.json b/checkpoint-60/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a
--- /dev/null
+++ b/checkpoint-60/adapter_model/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/checkpoint-60/adapter_model/adapter_model.bin b/checkpoint-60/adapter_model/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0e54e4e0d2f0c0cc73ba5b0f8ec131c3c2c7279f
--- /dev/null
+++ b/checkpoint-60/adapter_model/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b327deb5fedf5ccbe9c1384b4cdb4dc01776fa03dbeb30a2214f6841f4567748
+size 62788109
diff --git a/checkpoint-60/optimizer.pt b/checkpoint-60/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f7dd54d2c03fff9154cd90e8e69dee7f0e42040e
--- /dev/null
+++ b/checkpoint-60/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a2f445991ad6bdd4b0ebd91951705c1928028526a6440f6ff2bff79755e40d1
+size 250681597
diff --git a/checkpoint-60/rng_state.pth b/checkpoint-60/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e9e18517c70f5dae72227d3defcf55c223ca9380
--- /dev/null
+++ b/checkpoint-60/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62eb8acf4d5a8ebe978a6a10f79a959de2197a382345645c4fdf0dea05b326da
+size 14575
diff --git a/checkpoint-60/scheduler.pt b/checkpoint-60/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2425e63d70352eccd242f7b31acd4a4caeae8219
--- /dev/null
+++ b/checkpoint-60/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cec60983a98e8b83150f00f3aa55cd737ceaf31e404f45b98b313a2022906fb4
+size 627
diff --git a/checkpoint-60/trainer_state.json b/checkpoint-60/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..6a0993fdf64cb71bfe2178e5fad7d4e84393be09
--- /dev/null
+++ b/checkpoint-60/trainer_state.json
@@ -0,0 +1,427 @@
+{
+  "best_metric": 2.0228564739227295,
+  "best_model_checkpoint": "./qlora-out-kimiko-test2/checkpoint-60",
+  "epoch": 1.5508885298869144,
+  "eval_steps": 10,
+  "global_step": 60,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.03,
+      "learning_rate": 1e-05,
+      "loss": 1.5707,
+      "step": 1
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 2e-05,
+      "loss": 1.5621,
+      "step": 2
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 3e-05,
+      "loss": 1.4812,
+      "step": 3
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 4e-05,
+      "loss": 1.5197,
+      "step": 4
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 5e-05,
+      "loss": 1.5567,
+      "step": 5
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 5e-05,
+      "loss": 1.4645,
+      "step": 6
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 5e-05,
+      "loss": 1.6122,
+      "step": 7
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 5e-05,
+      "loss": 1.5596,
+      "step": 8
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 5e-05,
+      "loss": 1.5608,
+      "step": 9
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 5e-05,
+      "loss": 1.5456,
+      "step": 10
+    },
+    {
+      "epoch": 0.26,
+      "eval_loss": 2.098437547683716,
+      "eval_runtime": 119.6161,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.777,
+      "step": 10
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 5e-05,
+      "loss": 1.5645,
+      "step": 11
+    },
+    {
+      "epoch": 0.31,
+      "learning_rate": 5e-05,
+      "loss": 1.538,
+      "step": 12
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 5e-05,
+      "loss": 1.6388,
+      "step": 13
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 5e-05,
+      "loss": 1.4943,
+      "step": 14
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 5e-05,
+      "loss": 1.5469,
+      "step": 15
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 5e-05,
+      "loss": 1.6149,
+      "step": 16
+    },
+    {
+      "epoch": 0.44,
+      "learning_rate": 5e-05,
+      "loss": 1.5345,
+      "step": 17
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 5e-05,
+      "loss": 1.4903,
+      "step": 18
+    },
+    {
+      "epoch": 0.49,
+      "learning_rate": 5e-05,
+      "loss": 1.5499,
+      "step": 19
+    },
+    {
+      "epoch": 0.52,
+      "learning_rate": 5e-05,
+      "loss": 1.5934,
+      "step": 20
+    },
+    {
+      "epoch": 0.52,
+      "eval_loss": 2.066981077194214,
+      "eval_runtime": 119.5781,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.778,
+      "step": 20
+    },
+    {
+      "epoch": 0.54,
+      "learning_rate": 5e-05,
+      "loss": 1.4554,
+      "step": 21
+    },
+    {
+      "epoch": 0.57,
+      "learning_rate": 5e-05,
+      "loss": 1.5512,
+      "step": 22
+    },
+    {
+      "epoch": 0.59,
+      "learning_rate": 5e-05,
+      "loss": 1.4636,
+      "step": 23
+    },
+    {
+      "epoch": 0.62,
+      "learning_rate": 5e-05,
+      "loss": 1.5398,
+      "step": 24
+    },
+    {
+      "epoch": 0.65,
+      "learning_rate": 5e-05,
+      "loss": 1.5623,
+      "step": 25
+    },
+    {
+      "epoch": 0.67,
+      "learning_rate": 5e-05,
+      "loss": 1.4658,
+      "step": 26
+    },
+    {
+      "epoch": 0.7,
+      "learning_rate": 5e-05,
+      "loss": 1.4723,
+      "step": 27
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 5e-05,
+      "loss": 1.432,
+      "step": 28
+    },
+    {
+      "epoch": 0.75,
+      "learning_rate": 5e-05,
+      "loss": 1.4814,
+      "step": 29
+    },
+    {
+      "epoch": 0.78,
+      "learning_rate": 5e-05,
+      "loss": 1.4924,
+      "step": 30
+    },
+    {
+      "epoch": 0.78,
+      "eval_loss": 2.046339750289917,
+      "eval_runtime": 119.5771,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.778,
+      "step": 30
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 5e-05,
+      "loss": 1.5809,
+      "step": 31
+    },
+    {
+      "epoch": 0.83,
+      "learning_rate": 5e-05,
+      "loss": 1.4803,
+      "step": 32
+    },
+    {
+      "epoch": 0.85,
+      "learning_rate": 5e-05,
+      "loss": 1.4878,
+      "step": 33
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 5e-05,
+      "loss": 1.3871,
+      "step": 34
+    },
+    {
+      "epoch": 0.9,
+      "learning_rate": 5e-05,
+      "loss": 1.5151,
+      "step": 35
+    },
+    {
+      "epoch": 0.93,
+      "learning_rate": 5e-05,
+      "loss": 1.4212,
+      "step": 36
+    },
+    {
+      "epoch": 0.96,
+      "learning_rate": 5e-05,
+      "loss": 1.6284,
+      "step": 37
+    },
+    {
+      "epoch": 0.98,
+      "learning_rate": 5e-05,
+      "loss": 1.5002,
+      "step": 38
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 5e-05,
+      "loss": 1.4452,
+      "step": 39
+    },
+    {
+      "epoch": 1.03,
+      "learning_rate": 5e-05,
+      "loss": 1.4399,
+      "step": 40
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 2.0354697704315186,
+      "eval_runtime": 119.5875,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.778,
+      "step": 40
+    },
+    {
+      "epoch": 1.06,
+      "learning_rate": 5e-05,
+      "loss": 1.465,
+      "step": 41
+    },
+    {
+      "epoch": 1.09,
+      "learning_rate": 5e-05,
+      "loss": 1.4199,
+      "step": 42
+    },
+    {
+      "epoch": 1.11,
+      "learning_rate": 5e-05,
+      "loss": 1.5403,
+      "step": 43
+    },
+    {
+      "epoch": 1.14,
+      "learning_rate": 5e-05,
+      "loss": 1.4499,
+      "step": 44
+    },
+    {
+      "epoch": 1.16,
+      "learning_rate": 5e-05,
+      "loss": 1.5751,
+      "step": 45
+    },
+    {
+      "epoch": 1.19,
+      "learning_rate": 5e-05,
+      "loss": 1.4809,
+      "step": 46
+    },
+    {
+      "epoch": 1.21,
+      "learning_rate": 5e-05,
+      "loss": 1.5022,
+      "step": 47
+    },
+    {
+      "epoch": 1.24,
+      "learning_rate": 5e-05,
+      "loss": 1.4663,
+      "step": 48
+    },
+    {
+      "epoch": 1.27,
+      "learning_rate": 5e-05,
+      "loss": 1.4435,
+      "step": 49
+    },
+    {
+      "epoch": 1.29,
+      "learning_rate": 5e-05,
+      "loss": 1.4246,
+      "step": 50
+    },
+    {
+      "epoch": 1.29,
+      "eval_loss": 2.0276732444763184,
+      "eval_runtime": 119.5811,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.778,
+      "step": 50
+    },
+    {
+      "epoch": 1.32,
+      "learning_rate": 5e-05,
+      "loss": 1.4877,
+      "step": 51
+    },
+    {
+      "epoch": 1.34,
+      "learning_rate": 5e-05,
+      "loss": 1.4066,
+      "step": 52
+    },
+    {
+      "epoch": 1.37,
+      "learning_rate": 5e-05,
+      "loss": 1.3559,
+      "step": 53
+    },
+    {
+      "epoch": 1.4,
+      "learning_rate": 5e-05,
+      "loss": 1.5591,
+      "step": 54
+    },
+    {
+      "epoch": 1.42,
+      "learning_rate": 5e-05,
+      "loss": 1.4942,
+      "step": 55
+    },
+    {
+      "epoch": 1.45,
+      "learning_rate": 5e-05,
+      "loss": 1.4685,
+      "step": 56
+    },
+    {
+      "epoch": 1.47,
+      "learning_rate": 5e-05,
+      "loss": 1.4165,
+      "step": 57
+    },
+    {
+      "epoch": 1.5,
+      "learning_rate": 5e-05,
+      "loss": 1.3995,
+      "step": 58
+    },
+    {
+      "epoch": 1.53,
+      "learning_rate": 5e-05,
+      "loss": 1.3931,
+      "step": 59
+    },
+    {
+      "epoch": 1.55,
+      "learning_rate": 5e-05,
+      "loss": 1.4234,
+      "step": 60
+    },
+    {
+      "epoch": 1.55,
+      "eval_loss": 2.0228564739227295,
+      "eval_runtime": 119.5715,
+      "eval_samples_per_second": 1.556,
+      "eval_steps_per_second": 0.778,
+      "step": 60
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 114,
+  "num_train_epochs": 3,
+  "save_steps": 10,
+  "total_flos": 2.7433999518793728e+17,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-60/training_args.bin b/checkpoint-60/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8846f68deb94b60cd596e766674727db1bceb44e
--- /dev/null
+++ b/checkpoint-60/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca362c6e6a1bbe2523b2190a501c92d6dbb3db6186bef551619d83852cca3df1
+size 4219
diff --git a/checkpoint-70/README.md b/checkpoint-70/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d
--- /dev/null
+++ b/checkpoint-70/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.6.0.dev0
diff --git a/checkpoint-70/adapter_config.json b/checkpoint-70/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a
--- /dev/null
+++ b/checkpoint-70/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/checkpoint-70/adapter_model.bin b/checkpoint-70/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..38e26ac92914f3d9564f14fd978ad5e921b440fa
--- /dev/null
+++ b/checkpoint-70/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48a7e75898b7a7dd2d629293916f814af1562c4efdb5a70d04ae946f12b0bb33
+size 62788109
diff --git a/checkpoint-70/adapter_model/README.md b/checkpoint-70/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d
--- /dev/null
+++ b/checkpoint-70/adapter_model/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.6.0.dev0
diff --git a/checkpoint-70/adapter_model/adapter_config.json b/checkpoint-70/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9e2418ea050412b02e7dad46126b4232d268ad2a
--- /dev/null
+++ b/checkpoint-70/adapter_model/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/checkpoint-70/adapter_model/adapter_model.bin b/checkpoint-70/adapter_model/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..38e26ac92914f3d9564f14fd978ad5e921b440fa
--- /dev/null
+++ b/checkpoint-70/adapter_model/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48a7e75898b7a7dd2d629293916f814af1562c4efdb5a70d04ae946f12b0bb33
+size 62788109
diff --git a/checkpoint-70/optimizer.pt b/checkpoint-70/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..58fb41cb8141f937dfb28c6e141983d0f18e6d0c
--- /dev/null
+++ b/checkpoint-70/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac90a2b24be0197a3df0f5297600340ef66e29ea745dfe64ec949a6535612a33
+size 250681597
diff --git a/checkpoint-70/rng_state.pth b/checkpoint-70/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..04ab6fd12ed4050522c177c1b9c48b853478b278
--- /dev/null
+++ b/checkpoint-70/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ac73dfa960e69adad05f49630686f59d1ae4a2402121d980c5716dfea26fa8a
+size 14575
diff --git a/checkpoint-70/scheduler.pt b/checkpoint-70/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d450881a8ac82a0d1930697b19b8271841dfc3a3
--- /dev/null
+++ b/checkpoint-70/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8071d10c923b8842261293fddda48a72a72ada30d3518ad651a06114395d264b
+size 627
diff --git a/checkpoint-70/trainer_state.json b/checkpoint-70/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..3f4033849b6bf8eea6427977b9c178ee11b576d7
--- /dev/null
+++ b/checkpoint-70/trainer_state.json
@@ -0,0 +1,495 @@
+{
+  "best_metric": 2.0199856758117676,
+  "best_model_checkpoint": "./qlora-out-kimiko-test2/checkpoint-70",
+  "epoch": 1.8093699515347335,
+  "eval_steps": 10,
+  "global_step": 70,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.03,
+      "learning_rate": 1e-05,
+      "loss": 1.5707,
+      "step": 1
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 2e-05,
+      "loss": 1.5621,
+      "step": 2
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 3e-05,
+      "loss": 1.4812,
+      "step": 3
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 4e-05,
+      "loss": 1.5197,
+      "step": 4
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 5e-05,
+      "loss": 1.5567,
+      "step": 5
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 5e-05,
+      "loss": 1.4645,
+      "step": 6
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 5e-05,
+      "loss": 1.6122,
+      "step": 7
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 5e-05,
+      "loss": 1.5596,
+      "step": 8
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 5e-05,
+      "loss": 1.5608,
+      "step": 9
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 5e-05,
+      "loss": 1.5456,
+      "step": 10
+    },
+    {
+      "epoch": 0.26,
+      "eval_loss": 2.098437547683716,
+      "eval_runtime": 119.6161,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.777,
+      "step": 10
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 5e-05,
+      "loss": 1.5645,
+      "step": 11
+    },
+    {
+      "epoch": 0.31,
+      "learning_rate": 5e-05,
+      "loss": 1.538,
+      "step": 12
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 5e-05,
+      "loss": 1.6388,
+      "step": 13
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 5e-05,
+      "loss": 1.4943,
+      "step": 14
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 5e-05,
+      "loss": 1.5469,
+      "step": 15
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 5e-05,
+      "loss": 1.6149,
+      "step": 16
+    },
+    {
+      "epoch": 0.44,
+      "learning_rate": 5e-05,
+      "loss": 1.5345,
+      "step": 17
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 5e-05,
+      "loss": 1.4903,
+      "step": 18
+    },
+    {
+      "epoch": 0.49,
+      "learning_rate": 5e-05,
+      "loss": 1.5499,
+      "step": 19
+    },
+    {
+      "epoch": 0.52,
+      "learning_rate": 5e-05,
+      "loss": 1.5934,
+      "step": 20
+    },
+    {
+      "epoch": 0.52,
+      "eval_loss": 2.066981077194214,
+      "eval_runtime": 119.5781,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.778,
+      "step": 20
+    },
+    {
+      "epoch": 0.54,
+      "learning_rate": 5e-05,
+      "loss": 1.4554,
+      "step": 21
+    },
+    {
+      "epoch": 0.57,
+      "learning_rate": 5e-05,
+      "loss": 1.5512,
+      "step": 22
+    },
+    {
+      "epoch": 0.59,
+      "learning_rate": 5e-05,
+      "loss": 1.4636,
+      "step": 23
+    },
+    {
+      "epoch": 0.62,
+      "learning_rate": 5e-05,
+      "loss": 1.5398,
+      "step": 24
+    },
+    {
+      "epoch": 0.65,
+      "learning_rate": 5e-05,
+      "loss": 1.5623,
+      "step": 25
+    },
+    {
+      "epoch": 0.67,
+      "learning_rate": 5e-05,
+      "loss": 1.4658,
+      "step": 26
+    },
+    {
+      "epoch": 0.7,
+      "learning_rate": 5e-05,
+      "loss": 1.4723,
+      "step": 27
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 5e-05,
+      "loss": 1.432,
+      "step": 28
+    },
+    {
+      "epoch": 0.75,
+      "learning_rate": 5e-05,
+      "loss": 1.4814,
+      "step": 29
+    },
+    {
+      "epoch": 0.78,
+      "learning_rate": 5e-05,
+      "loss": 1.4924,
+      "step": 30
+    },
+    {
+      "epoch": 0.78,
+      "eval_loss": 2.046339750289917,
+      "eval_runtime": 119.5771,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.778,
+      "step": 30
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 5e-05,
+      "loss": 1.5809,
+      "step": 31
+    },
+    {
+      "epoch": 0.83,
+      "learning_rate": 5e-05,
+      "loss": 1.4803,
+      "step": 32
+    },
+    {
+      "epoch": 0.85,
+      "learning_rate": 5e-05,
+      "loss": 1.4878,
+      "step": 33
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 5e-05,
+      "loss": 1.3871,
+      "step": 34
+    },
+    {
+      "epoch": 0.9,
+      "learning_rate": 5e-05,
+      "loss": 1.5151,
+      "step": 35
+    },
+    {
+      "epoch": 0.93,
+      "learning_rate": 5e-05,
+      "loss": 1.4212,
+      "step": 36
+    },
+    {
+      "epoch": 0.96,
+      "learning_rate": 5e-05,
+      "loss": 1.6284,
+      "step": 37
+    },
+    {
+      "epoch": 0.98,
+      "learning_rate": 5e-05,
+      "loss": 1.5002,
+      "step": 38
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 5e-05,
+      "loss": 1.4452,
+      "step": 39
+    },
+    {
+      "epoch": 1.03,
+      "learning_rate": 5e-05,
+      "loss": 1.4399,
+      "step": 40
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 2.0354697704315186,
+      "eval_runtime": 119.5875,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.778,
+      "step": 40
+    },
+    {
+      "epoch": 1.06,
+      "learning_rate": 5e-05,
+      "loss": 1.465,
+      "step": 41
+    },
+    {
+      "epoch": 1.09,
+      "learning_rate": 5e-05,
+      "loss": 1.4199,
+      "step": 42
+    },
+    {
+      "epoch": 1.11,
+      "learning_rate": 5e-05,
+      "loss": 1.5403,
+      "step": 43
+    },
+    {
+      "epoch": 1.14,
+      "learning_rate": 5e-05,
+      "loss": 1.4499,
+      "step": 44
+    },
+    {
+      "epoch": 1.16,
+      "learning_rate": 5e-05,
+      "loss": 1.5751,
+      "step": 45
+    },
+    {
+      "epoch": 1.19,
+      "learning_rate": 5e-05,
+      "loss": 1.4809,
+      "step": 46
+    },
+    {
+      "epoch": 1.21,
+      "learning_rate": 5e-05,
+      "loss": 1.5022,
+      "step": 47
+    },
+    {
+      "epoch": 1.24,
+      "learning_rate": 5e-05,
+      "loss": 1.4663,
+      "step": 48
+    },
+    {
+      "epoch": 1.27,
+      "learning_rate": 5e-05,
+      "loss": 1.4435,
+      "step": 49
+    },
+    {
+      "epoch": 1.29,
+      "learning_rate": 5e-05,
+      "loss": 1.4246,
+      "step": 50
+    },
+    {
+      "epoch": 1.29,
+      "eval_loss": 2.0276732444763184,
+      "eval_runtime": 119.5811,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.778,
+      "step": 50
+    },
+    {
+      "epoch": 1.32,
+      "learning_rate": 5e-05,
+      "loss": 1.4877,
+      "step": 51
+    },
+    {
+      "epoch": 1.34,
+      "learning_rate": 5e-05,
+      "loss": 1.4066,
+      "step": 52
+    },
+    {
+      "epoch": 1.37,
+      "learning_rate": 5e-05,
+      "loss": 1.3559,
+      "step": 53
+    },
+    {
+      "epoch": 1.4,
+      "learning_rate": 5e-05,
+      "loss": 1.5591,
+      "step": 54
+    },
+    {
+      "epoch": 1.42,
+      "learning_rate": 5e-05,
+      "loss": 1.4942,
+      "step": 55
+    },
+    {
+      "epoch": 1.45,
+      "learning_rate": 5e-05,
+      "loss": 1.4685,
+      "step": 56
+    },
+    {
+      "epoch": 1.47,
+      "learning_rate": 5e-05,
+      "loss": 1.4165,
+      "step": 57
+    },
+    {
+      "epoch": 1.5,
+      "learning_rate": 5e-05,
+      "loss": 1.3995,
+      "step": 58
+    },
+    {
+      "epoch": 1.53,
+      "learning_rate": 5e-05,
+      "loss": 1.3931,
+      "step": 59
+    },
+    {
+      "epoch": 1.55,
+      "learning_rate": 5e-05,
+      "loss": 1.4234,
+      "step": 60
+    },
+    {
+      "epoch": 1.55,
+      "eval_loss": 2.0228564739227295,
+      "eval_runtime": 119.5715,
+      "eval_samples_per_second": 1.556,
+      "eval_steps_per_second": 0.778,
+      "step": 60
+    },
+    {
+      "epoch": 1.58,
+      "learning_rate": 5e-05,
+      "loss": 1.4732,
+      "step": 61
+    },
+    {
+      "epoch": 1.6,
+      "learning_rate": 5e-05,
+      "loss": 1.4349,
+      "step": 62
+    },
+    {
+      "epoch": 1.63,
+      "learning_rate": 5e-05,
+      "loss": 1.4548,
+      "step": 63
+    },
+    {
+      "epoch": 1.65,
+      "learning_rate": 5e-05,
+      "loss": 1.48,
+      "step": 64
+    },
+    {
+      "epoch": 1.68,
+      "learning_rate": 5e-05,
+      "loss": 1.3789,
+      "step": 65
+    },
+    {
+      "epoch": 1.71,
+      "learning_rate": 5e-05,
+      "loss": 1.3915,
+      "step": 66
+    },
+    {
+      "epoch": 1.73,
+      "learning_rate": 5e-05,
+      "loss": 1.3789,
+      "step": 67
+    },
+    {
+      "epoch": 1.76,
+      "learning_rate": 5e-05,
+      "loss": 1.5206,
+      "step": 68
+    },
+    {
+      "epoch": 1.78,
+      "learning_rate": 5e-05,
+      "loss": 1.4851,
+      "step": 69
+    },
+    {
+      "epoch": 1.81,
+      "learning_rate": 5e-05,
+      "loss": 1.5251,
+      "step": 70
+    },
+    {
+      "epoch": 1.81,
+      "eval_loss": 2.0199856758117676,
+      "eval_runtime": 119.5994,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.778,
+      "step": 70
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 114,
+  "num_train_epochs": 3,
+  "save_steps": 10,
+  "total_flos": 3.2059276062621696e+17,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-70/training_args.bin b/checkpoint-70/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8846f68deb94b60cd596e766674727db1bceb44e
--- /dev/null
+++ b/checkpoint-70/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca362c6e6a1bbe2523b2190a501c92d6dbb3db6186bef551619d83852cca3df1
+size 4219
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..3f58a5e115855c6ea3cec98accae196ad927222e
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,6 @@
+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "pad_token": "[PAD]",
+  "unk_token": "<unk>"
+}
diff --git a/tokenizer.model b/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899
--- /dev/null
+++ b/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..65532c2074f7f407d2d801a559f3d90aa5137f26
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,38 @@
+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "clean_up_tokenization_spaces": false,
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "legacy": false,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": null,
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "trust_remote_code": false,
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "use_default_system_prompt": true,
+  "use_fast": true
+}