lmg-anon commited on Dec 11, 2023

Commit

86f9061

1 Parent(s): 49ef932

Upload 50 files

Browse files

Files changed (50) hide show

adapter_config.json +28 -0
adapter_model.bin +3 -0
checkpoint-120/README.md +220 -0
checkpoint-120/adapter_config.json +28 -0
checkpoint-120/adapter_model.safetensors +3 -0
checkpoint-120/optimizer.pt +3 -0
checkpoint-120/rng_state.pth +3 -0
checkpoint-120/scheduler.pt +3 -0
checkpoint-120/trainer_state.json +739 -0
checkpoint-120/training_args.bin +3 -0
checkpoint-190/README.md +220 -0
checkpoint-190/adapter_config.json +28 -0
checkpoint-190/adapter_model.safetensors +3 -0
checkpoint-190/optimizer.pt +3 -0
checkpoint-190/rng_state.pth +3 -0
checkpoint-190/scheduler.pt +3 -0
checkpoint-190/trainer_state.json +1159 -0
checkpoint-190/training_args.bin +3 -0
checkpoint-200/README.md +220 -0
checkpoint-200/adapter_config.json +28 -0
checkpoint-200/adapter_model.safetensors +3 -0
checkpoint-200/optimizer.pt +3 -0
checkpoint-200/rng_state.pth +3 -0
checkpoint-200/scheduler.pt +3 -0
checkpoint-200/trainer_state.json +1219 -0
checkpoint-200/training_args.bin +3 -0
checkpoint-40/README.md +220 -0
checkpoint-40/adapter_config.json +28 -0
checkpoint-40/adapter_model.safetensors +3 -0
checkpoint-40/optimizer.pt +3 -0
checkpoint-40/rng_state.pth +3 -0
checkpoint-40/scheduler.pt +3 -0
checkpoint-40/trainer_state.json +259 -0
checkpoint-40/training_args.bin +3 -0
checkpoint-80/README.md +220 -0
checkpoint-80/adapter_config.json +28 -0
checkpoint-80/adapter_model.safetensors +3 -0
checkpoint-80/optimizer.pt +3 -0
checkpoint-80/rng_state.pth +3 -0
checkpoint-80/scheduler.pt +3 -0
checkpoint-80/trainer_state.json +499 -0
checkpoint-80/training_args.bin +3 -0
checkpoint-90/README.md +220 -0
checkpoint-90/adapter_config.json +28 -0
checkpoint-90/adapter_model.safetensors +3 -0
checkpoint-90/optimizer.pt +3 -0
checkpoint-90/rng_state.pth +3 -0
checkpoint-90/scheduler.pt +3 -0
checkpoint-90/trainer_state.json +559 -0
checkpoint-90/training_args.bin +3 -0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "unsloth/llama-2-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 32,
+  "lora_dropout": 0,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "up_proj",
+    "o_proj",
+    "gate_proj",
+    "down_proj",
+    "q_proj",
+    "v_proj",
+    "k_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f79513adcdbec489bcae5922568302801faa2dd8cb229d4f8a34888c07a6faab
+size 160069834

checkpoint-120/README.md ADDED Viewed

	@@ -0,0 +1,220 @@

+---
+library_name: peft
+base_model: unsloth/llama-2-7b
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+## Training procedure
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+- PEFT 0.6.2

checkpoint-120/adapter_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "unsloth/llama-2-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 32,
+  "lora_dropout": 0,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "up_proj",
+    "down_proj",
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

checkpoint-120/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b794f816309a7ae9395e4659e941c80ca8a91b6e0b09e526a868ba9f3142d890
+size 159967880

checkpoint-120/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2305266e1e6c759d565002fd6a4fc8d6a8c2c7f49e3864d55ce9feb771839611
+size 80630612

checkpoint-120/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:203e9bfabd925cb4ec7129d24877156fcee87215187c35a867e358e56a9425a4
+size 14244

checkpoint-120/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65d56887428ca405cc745bbbbd7f519276e95c51982e722119e5f11f0b51d490
+size 1064

checkpoint-120/trainer_state.json ADDED Viewed

	@@ -0,0 +1,739 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.28776978417266186,
+  "eval_steps": 500,
+  "global_step": 120,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.00013,
+      "loss": 1.1241,
+      "step": 1
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.00026,
+      "loss": 1.0107,
+      "step": 2
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.00039,
+      "loss": 1.1086,
+      "step": 3
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.00052,
+      "loss": 1.0044,
+      "step": 4
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.00065,
+      "loss": 1.0496,
+      "step": 5
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.0005933661039639299,
+      "loss": 1.0199,
+      "step": 6
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.0005493502655735357,
+      "loss": 1.0198,
+      "step": 7
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.0005138701197773616,
+      "loss": 0.969,
+      "step": 8
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.0004844813951249544,
+      "loss": 0.9383,
+      "step": 9
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.0004596194077712558,
+      "loss": 0.8776,
+      "step": 10
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 0.0004382299106011073,
+      "loss": 1.0173,
+      "step": 11
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 0.0004195731958391368,
+      "loss": 1.1173,
+      "step": 12
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 0.0004031128874149274,
+      "loss": 1.0876,
+      "step": 13
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 0.0003884492980336779,
+      "loss": 1.0524,
+      "step": 14
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.0003752776749732568,
+      "loss": 0.8953,
+      "step": 15
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.00036336104634371584,
+      "loss": 1.1335,
+      "step": 16
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.00035251199395531623,
+      "loss": 0.9837,
+      "step": 17
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.00034258007985157445,
+      "loss": 0.9707,
+      "step": 18
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 0.0003334429644276751,
+      "loss": 0.9149,
+      "step": 19
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 0.000325,
+      "loss": 1.0043,
+      "step": 20
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 0.00031716752370827323,
+      "loss": 1.001,
+      "step": 21
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 0.00030987534150481746,
+      "loss": 1.0395,
+      "step": 22
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.000303064062678102,
+      "loss": 0.8718,
+      "step": 23
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.00029668305198196496,
+      "loss": 1.1114,
+      "step": 24
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.00029068883707497264,
+      "loss": 0.7765,
+      "step": 25
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.0002850438562747845,
+      "loss": 0.9522,
+      "step": 26
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.00027971546389275785,
+      "loss": 0.9588,
+      "step": 27
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.00027467513278676785,
+      "loss": 1.0313,
+      "step": 28
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.0002698978095246549,
+      "loss": 0.9338,
+      "step": 29
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.000265361388801511,
+      "loss": 0.892,
+      "step": 30
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.00026104628189331215,
+      "loss": 0.893,
+      "step": 31
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.0002569350598886808,
+      "loss": 0.8983,
+      "step": 32
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.00025301215685249496,
+      "loss": 0.9277,
+      "step": 33
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.00024926362137539537,
+      "loss": 0.8962,
+      "step": 34
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.00024567690745599767,
+      "loss": 0.9124,
+      "step": 35
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.0002422406975624772,
+      "loss": 0.9535,
+      "step": 36
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.00023894475218048754,
+      "loss": 0.9019,
+      "step": 37
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.0002357797812857538,
+      "loss": 1.024,
+      "step": 38
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.00023273733406281566,
+      "loss": 0.8549,
+      "step": 39
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 0.0002298097038856279,
+      "loss": 1.0489,
+      "step": 40
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 0.00022698984612511293,
+      "loss": 0.751,
+      "step": 41
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 0.00022427130678626507,
+      "loss": 0.834,
+      "step": 42
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 0.00022164816032790388,
+      "loss": 0.889,
+      "step": 43
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 0.00021911495530055366,
+      "loss": 1.0103,
+      "step": 44
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 0.00021666666666666666,
+      "loss": 0.8766,
+      "step": 45
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 0.0002142986538536308,
+      "loss": 0.8181,
+      "step": 46
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 0.0002120066237423687,
+      "loss": 0.8754,
+      "step": 47
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 0.0002097865979195684,
+      "loss": 0.9038,
+      "step": 48
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 0.00020763488362498048,
+      "loss": 0.8646,
+      "step": 49
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 0.00020554804791094464,
+      "loss": 0.8836,
+      "step": 50
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 0.0002035228946026736,
+      "loss": 0.9962,
+      "step": 51
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 0.0002015564437074637,
+      "loss": 0.8835,
+      "step": 52
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 0.00019964591297103414,
+      "loss": 0.9196,
+      "step": 53
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 0.00019778870132130996,
+      "loss": 0.8995,
+      "step": 54
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 0.00019598237397554634,
+      "loss": 1.0178,
+      "step": 55
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 0.00019422464901683895,
+      "loss": 0.9395,
+      "step": 56
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 0.00019251338527170498,
+      "loss": 0.9882,
+      "step": 57
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 0.00019084657134227863,
+      "loss": 0.9274,
+      "step": 58
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 0.00018922231566536414,
+      "loss": 0.9517,
+      "step": 59
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 0.0001876388374866284,
+      "loss": 0.865,
+      "step": 60
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 0.00018609445865200715,
+      "loss": 0.9314,
+      "step": 61
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 0.00018458759613029606,
+      "loss": 0.9224,
+      "step": 62
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 0.00018311675519117857,
+      "loss": 0.788,
+      "step": 63
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 0.00018168052317185792,
+      "loss": 0.9739,
+      "step": 64
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 0.00018027756377319947,
+      "loss": 0.9419,
+      "step": 65
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 0.0001789066118330336,
+      "loss": 0.8772,
+      "step": 66
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 0.00017756646853014972,
+      "loss": 0.8707,
+      "step": 67
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 0.00017625599697765812,
+      "loss": 0.8089,
+      "step": 68
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 0.00017497411816890378,
+      "loss": 0.9303,
+      "step": 69
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 0.00017371980724307585,
+      "loss": 0.9161,
+      "step": 70
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 0.00017249209004113945,
+      "loss": 0.9064,
+      "step": 71
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 0.00017129003992578723,
+      "loss": 1.0988,
+      "step": 72
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 0.00017011277484181944,
+      "loss": 0.9804,
+      "step": 73
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 0.0001689594545957618,
+      "loss": 0.8382,
+      "step": 74
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 0.00016782927833565472,
+      "loss": 0.9632,
+      "step": 75
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 0.00016672148221383754,
+      "loss": 0.9494,
+      "step": 76
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 0.00016563533721722828,
+      "loss": 0.9253,
+      "step": 77
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 0.0001645701471510958,
+      "loss": 0.9143,
+      "step": 78
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 0.00016352524676365398,
+      "loss": 0.8907,
+      "step": 79
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 0.0001625,
+      "loss": 0.9748,
+      "step": 80
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 0.00016149379837498482,
+      "loss": 0.893,
+      "step": 81
+    },
+    {
+      "epoch": 0.2,
+      "learning_rate": 0.00016050605945555833,
+      "loss": 0.839,
+      "step": 82
+    },
+    {
+      "epoch": 0.2,
+      "learning_rate": 0.0001595362254439902,
+      "loss": 0.9276,
+      "step": 83
+    },
+    {
+      "epoch": 0.2,
+      "learning_rate": 0.00015858376185413662,
+      "loss": 0.8758,
+      "step": 84
+    },
+    {
+      "epoch": 0.2,
+      "learning_rate": 0.00015764815627361642,
+      "loss": 0.9125,
+      "step": 85
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 0.00015672891720538393,
+      "loss": 0.955,
+      "step": 86
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 0.00015582557298274985,
+      "loss": 0.9104,
+      "step": 87
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 0.00015493767075240873,
+      "loss": 0.8861,
+      "step": 88
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 0.0001540647755204926,
+      "loss": 0.9693,
+      "step": 89
+    },
+    {
+      "epoch": 0.22,
+      "learning_rate": 0.0001532064692570853,
+      "loss": 0.7245,
+      "step": 90
+    },
+    {
+      "epoch": 0.22,
+      "learning_rate": 0.000152362350055011,
+      "loss": 0.7523,
+      "step": 91
+    },
+    {
+      "epoch": 0.22,
+      "learning_rate": 0.000151532031339051,
+      "loss": 0.8522,
+      "step": 92
+    },
+    {
+      "epoch": 0.22,
+      "learning_rate": 0.00015071514112205468,
+      "loss": 0.9273,
+      "step": 93
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 0.0001499113213046938,
+      "loss": 1.0303,
+      "step": 94
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 0.00014912022701586513,
+      "loss": 0.9273,
+      "step": 95
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 0.00014834152599098248,
+      "loss": 0.9071,
+      "step": 96
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 0.00014757489798561242,
+      "loss": 0.954,
+      "step": 97
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 0.00014682003422210332,
+      "loss": 0.7897,
+      "step": 98
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 0.00014607663686703578,
+      "loss": 0.9045,
+      "step": 99
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 0.00014534441853748632,
+      "loss": 0.7919,
+      "step": 100
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 0.00014462310183424506,
+      "loss": 0.7449,
+      "step": 101
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 0.0001439124189002655,
+      "loss": 0.8953,
+      "step": 102
+    },
+    {
+      "epoch": 0.25,
+      "learning_rate": 0.0001432121110027503,
+      "loss": 0.974,
+      "step": 103
+    },
+    {
+      "epoch": 0.25,
+      "learning_rate": 0.00014252192813739225,
+      "loss": 0.959,
+      "step": 104
+    },
+    {
+      "epoch": 0.25,
+      "learning_rate": 0.00014184162865339505,
+      "loss": 0.8767,
+      "step": 105
+    },
+    {
+      "epoch": 0.25,
+      "learning_rate": 0.00014117097889799755,
+      "loss": 0.9206,
+      "step": 106
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 0.000140509752879313,
+      "loss": 0.8096,
+      "step": 107
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 0.00013985773194637893,
+      "loss": 0.9726,
+      "step": 108
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 0.00013921470448538878,
+      "loss": 0.7764,
+      "step": 109
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 0.00013858046563114675,
+      "loss": 0.8414,
+      "step": 110
+    },
+    {
+      "epoch": 0.27,
+      "learning_rate": 0.0001379548169928529,
+      "loss": 0.9365,
+      "step": 111
+    },
+    {
+      "epoch": 0.27,
+      "learning_rate": 0.00013733756639338393,
+      "loss": 0.9857,
+      "step": 112
+    },
+    {
+      "epoch": 0.27,
+      "learning_rate": 0.00013672852762129314,
+      "loss": 0.8209,
+      "step": 113
+    },
+    {
+      "epoch": 0.27,
+      "learning_rate": 0.00013612752019480102,
+      "loss": 0.9954,
+      "step": 114
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 0.0001355343691370986,
+      "loss": 0.9425,
+      "step": 115
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 0.00013494890476232745,
+      "loss": 0.9199,
+      "step": 116
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 0.0001343709624716425,
+      "loss": 1.0011,
+      "step": 117
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 0.00013380038255880045,
+      "loss": 0.9335,
+      "step": 118
+    },
+    {
+      "epoch": 0.29,
+      "learning_rate": 0.000133237010024753,
+      "loss": 1.0612,
+      "step": 119
+    },
+    {
+      "epoch": 0.29,
+      "learning_rate": 0.0001326806944007555,
+      "loss": 0.8794,
+      "step": 120
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 417,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 3.891394643755008e+16,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-120/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:482256ef65b59af088490f9e3870f1b868a90992cd6294900bd43d13ef410480
+size 4536

checkpoint-190/README.md ADDED Viewed

	@@ -0,0 +1,220 @@

+---
+library_name: peft
+base_model: unsloth/llama-2-7b
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+## Training procedure
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+- PEFT 0.6.2

checkpoint-190/adapter_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "unsloth/llama-2-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 32,
+  "lora_dropout": 0,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "up_proj",
+    "down_proj",
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

checkpoint-190/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b1782493354a771246d082dc93657ab22db0ff3ddd5df17582fee11d00146f9
+size 159967880

checkpoint-190/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:57486841c0688ed8ffaa2b58024863dbb4ba387677b7d20b6d188a9d6d450e01
+size 80630612

checkpoint-190/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:203e9bfabd925cb4ec7129d24877156fcee87215187c35a867e358e56a9425a4
+size 14244

checkpoint-190/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fdcbbcb76fd3462a8f7147c20ae6d636210dbe8343e20495ae55ada97cf0e38a
+size 1064

checkpoint-190/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1159 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.4556354916067146,
+  "eval_steps": 500,
+  "global_step": 190,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.00013,
+      "loss": 1.1241,
+      "step": 1
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.00026,
+      "loss": 1.0107,
+      "step": 2
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.00039,
+      "loss": 1.1086,
+      "step": 3
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.00052,
+      "loss": 1.0044,
+      "step": 4
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.00065,
+      "loss": 1.0496,
+      "step": 5
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.0005933661039639299,
+      "loss": 1.0199,
+      "step": 6
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.0005493502655735357,
+      "loss": 1.0198,
+      "step": 7
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.0005138701197773616,
+      "loss": 0.969,
+      "step": 8
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.0004844813951249544,
+      "loss": 0.9383,
+      "step": 9
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.0004596194077712558,
+      "loss": 0.8776,
+      "step": 10
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 0.0004382299106011073,
+      "loss": 1.0173,
+      "step": 11
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 0.0004195731958391368,
+      "loss": 1.1173,
+      "step": 12
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 0.0004031128874149274,
+      "loss": 1.0876,
+      "step": 13
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 0.0003884492980336779,
+      "loss": 1.0524,
+      "step": 14
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.0003752776749732568,
+      "loss": 0.8953,
+      "step": 15
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.00036336104634371584,
+      "loss": 1.1335,
+      "step": 16
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.00035251199395531623,
+      "loss": 0.9837,
+      "step": 17
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.00034258007985157445,
+      "loss": 0.9707,
+      "step": 18
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 0.0003334429644276751,
+      "loss": 0.9149,
+      "step": 19
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 0.000325,
+      "loss": 1.0043,
+      "step": 20
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 0.00031716752370827323,
+      "loss": 1.001,
+      "step": 21
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 0.00030987534150481746,
+      "loss": 1.0395,
+      "step": 22
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.000303064062678102,
+      "loss": 0.8718,
+      "step": 23
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.00029668305198196496,
+      "loss": 1.1114,
+      "step": 24
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.00029068883707497264,
+      "loss": 0.7765,
+      "step": 25
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.0002850438562747845,
+      "loss": 0.9522,
+      "step": 26
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.00027971546389275785,
+      "loss": 0.9588,
+      "step": 27
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.00027467513278676785,
+      "loss": 1.0313,
+      "step": 28
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.0002698978095246549,
+      "loss": 0.9338,
+      "step": 29
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.000265361388801511,
+      "loss": 0.892,
+      "step": 30
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.00026104628189331215,
+      "loss": 0.893,
+      "step": 31
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.0002569350598886808,
+      "loss": 0.8983,
+      "step": 32
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.00025301215685249496,
+      "loss": 0.9277,
+      "step": 33
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.00024926362137539537,
+      "loss": 0.8962,
+      "step": 34
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.00024567690745599767,
+      "loss": 0.9124,
+      "step": 35
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.0002422406975624772,
+      "loss": 0.9535,
+      "step": 36
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.00023894475218048754,
+      "loss": 0.9019,
+      "step": 37
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.0002357797812857538,
+      "loss": 1.024,
+      "step": 38
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.00023273733406281566,
+      "loss": 0.8549,
+      "step": 39
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 0.0002298097038856279,
+      "loss": 1.0489,
+      "step": 40
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 0.00022698984612511293,
+      "loss": 0.751,
+      "step": 41
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 0.00022427130678626507,
+      "loss": 0.834,
+      "step": 42
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 0.00022164816032790388,
+      "loss": 0.889,
+      "step": 43
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 0.00021911495530055366,
+      "loss": 1.0103,
+      "step": 44
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 0.00021666666666666666,
+      "loss": 0.8766,
+      "step": 45
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 0.0002142986538536308,
+      "loss": 0.8181,
+      "step": 46
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 0.0002120066237423687,
+      "loss": 0.8754,
+      "step": 47
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 0.0002097865979195684,
+      "loss": 0.9038,
+      "step": 48
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 0.00020763488362498048,
+      "loss": 0.8646,
+      "step": 49
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 0.00020554804791094464,
+      "loss": 0.8836,
+      "step": 50
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 0.0002035228946026736,
+      "loss": 0.9962,
+      "step": 51
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 0.0002015564437074637,
+      "loss": 0.8835,
+      "step": 52
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 0.00019964591297103414,
+      "loss": 0.9196,
+      "step": 53
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 0.00019778870132130996,
+      "loss": 0.8995,
+      "step": 54
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 0.00019598237397554634,
+      "loss": 1.0178,
+      "step": 55
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 0.00019422464901683895,
+      "loss": 0.9395,
+      "step": 56
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 0.00019251338527170498,
+      "loss": 0.9882,
+      "step": 57
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 0.00019084657134227863,
+      "loss": 0.9274,
+      "step": 58
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 0.00018922231566536414,
+      "loss": 0.9517,
+      "step": 59
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 0.0001876388374866284,
+      "loss": 0.865,
+      "step": 60
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 0.00018609445865200715,
+      "loss": 0.9314,
+      "step": 61
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 0.00018458759613029606,
+      "loss": 0.9224,
+      "step": 62
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 0.00018311675519117857,
+      "loss": 0.788,
+      "step": 63
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 0.00018168052317185792,
+      "loss": 0.9739,
+      "step": 64
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 0.00018027756377319947,
+      "loss": 0.9419,
+      "step": 65
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 0.0001789066118330336,
+      "loss": 0.8772,
+      "step": 66
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 0.00017756646853014972,
+      "loss": 0.8707,
+      "step": 67
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 0.00017625599697765812,
+      "loss": 0.8089,
+      "step": 68
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 0.00017497411816890378,
+      "loss": 0.9303,
+      "step": 69
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 0.00017371980724307585,
+      "loss": 0.9161,
+      "step": 70
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 0.00017249209004113945,
+      "loss": 0.9064,
+      "step": 71
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 0.00017129003992578723,
+      "loss": 1.0988,
+      "step": 72
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 0.00017011277484181944,
+      "loss": 0.9804,
+      "step": 73
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 0.0001689594545957618,
+      "loss": 0.8382,
+      "step": 74
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 0.00016782927833565472,
+      "loss": 0.9632,
+      "step": 75
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 0.00016672148221383754,
+      "loss": 0.9494,
+      "step": 76
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 0.00016563533721722828,
+      "loss": 0.9253,
+      "step": 77
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 0.0001645701471510958,
+      "loss": 0.9143,
+      "step": 78
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 0.00016352524676365398,
+      "loss": 0.8907,
+      "step": 79
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 0.0001625,
+      "loss": 0.9748,
+      "step": 80
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 0.00016149379837498482,
+      "loss": 0.893,
+      "step": 81
+    },
+    {
+      "epoch": 0.2,
+      "learning_rate": 0.00016050605945555833,
+      "loss": 0.839,
+      "step": 82
+    },
+    {
+      "epoch": 0.2,
+      "learning_rate": 0.0001595362254439902,
+      "loss": 0.9276,
+      "step": 83
+    },
+    {
+      "epoch": 0.2,
+      "learning_rate": 0.00015858376185413662,
+      "loss": 0.8758,
+      "step": 84
+    },
+    {
+      "epoch": 0.2,
+      "learning_rate": 0.00015764815627361642,
+      "loss": 0.9125,
+      "step": 85
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 0.00015672891720538393,
+      "loss": 0.955,
+      "step": 86
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 0.00015582557298274985,
+      "loss": 0.9104,
+      "step": 87
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 0.00015493767075240873,
+      "loss": 0.8861,
+      "step": 88
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 0.0001540647755204926,
+      "loss": 0.9693,
+      "step": 89
+    },
+    {
+      "epoch": 0.22,
+      "learning_rate": 0.0001532064692570853,
+      "loss": 0.7245,
+      "step": 90
+    },
+    {
+      "epoch": 0.22,
+      "learning_rate": 0.000152362350055011,
+      "loss": 0.7523,
+      "step": 91
+    },
+    {
+      "epoch": 0.22,
+      "learning_rate": 0.000151532031339051,
+      "loss": 0.8522,
+      "step": 92
+    },
+    {
+      "epoch": 0.22,
+      "learning_rate": 0.00015071514112205468,
+      "loss": 0.9273,
+      "step": 93
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 0.0001499113213046938,
+      "loss": 1.0303,
+      "step": 94
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 0.00014912022701586513,
+      "loss": 0.9273,
+      "step": 95
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 0.00014834152599098248,
+      "loss": 0.9071,
+      "step": 96
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 0.00014757489798561242,
+      "loss": 0.954,
+      "step": 97
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 0.00014682003422210332,
+      "loss": 0.7897,
+      "step": 98
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 0.00014607663686703578,
+      "loss": 0.9045,
+      "step": 99
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 0.00014534441853748632,
+      "loss": 0.7919,
+      "step": 100
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 0.00014462310183424506,
+      "loss": 0.7449,
+      "step": 101
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 0.0001439124189002655,
+      "loss": 0.8953,
+      "step": 102
+    },
+    {
+      "epoch": 0.25,
+      "learning_rate": 0.0001432121110027503,
+      "loss": 0.974,
+      "step": 103
+    },
+    {
+      "epoch": 0.25,
+      "learning_rate": 0.00014252192813739225,
+      "loss": 0.959,
+      "step": 104
+    },
+    {
+      "epoch": 0.25,
+      "learning_rate": 0.00014184162865339505,
+      "loss": 0.8767,
+      "step": 105
+    },
+    {
+      "epoch": 0.25,
+      "learning_rate": 0.00014117097889799755,
+      "loss": 0.9206,
+      "step": 106
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 0.000140509752879313,
+      "loss": 0.8096,
+      "step": 107
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 0.00013985773194637893,
+      "loss": 0.9726,
+      "step": 108
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 0.00013921470448538878,
+      "loss": 0.7764,
+      "step": 109
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 0.00013858046563114675,
+      "loss": 0.8414,
+      "step": 110
+    },
+    {
+      "epoch": 0.27,
+      "learning_rate": 0.0001379548169928529,
+      "loss": 0.9365,
+      "step": 111
+    },
+    {
+      "epoch": 0.27,
+      "learning_rate": 0.00013733756639338393,
+      "loss": 0.9857,
+      "step": 112
+    },
+    {
+      "epoch": 0.27,
+      "learning_rate": 0.00013672852762129314,
+      "loss": 0.8209,
+      "step": 113
+    },
+    {
+      "epoch": 0.27,
+      "learning_rate": 0.00013612752019480102,
+      "loss": 0.9954,
+      "step": 114
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 0.0001355343691370986,
+      "loss": 0.9425,
+      "step": 115
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 0.00013494890476232745,
+      "loss": 0.9199,
+      "step": 116
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 0.0001343709624716425,
+      "loss": 1.0011,
+      "step": 117
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 0.00013380038255880045,
+      "loss": 0.9335,
+      "step": 118
+    },
+    {
+      "epoch": 0.29,
+      "learning_rate": 0.000133237010024753,
+      "loss": 1.0612,
+      "step": 119
+    },
+    {
+      "epoch": 0.29,
+      "learning_rate": 0.0001326806944007555,
+      "loss": 0.8794,
+      "step": 120
+    },
+    {
+      "epoch": 0.29,
+      "learning_rate": 0.00013213128957953303,
+      "loss": 0.8557,
+      "step": 121
+    },
+    {
+      "epoch": 0.29,
+      "learning_rate": 0.00013158865365407385,
+      "loss": 0.931,
+      "step": 122
+    },
+    {
+      "epoch": 0.29,
+      "learning_rate": 0.00013105264876364566,
+      "loss": 0.9648,
+      "step": 123
+    },
+    {
+      "epoch": 0.3,
+      "learning_rate": 0.00013052314094665608,
+      "loss": 0.8448,
+      "step": 124
+    },
+    {
+      "epoch": 0.3,
+      "learning_rate": 0.00013,
+      "loss": 0.9247,
+      "step": 125
+    },
+    {
+      "epoch": 0.3,
+      "learning_rate": 0.0001294830993445593,
+      "loss": 0.9537,
+      "step": 126
+    },
+    {
+      "epoch": 0.3,
+      "learning_rate": 0.00012897231589653857,
+      "loss": 0.8049,
+      "step": 127
+    },
+    {
+      "epoch": 0.31,
+      "learning_rate": 0.0001284675299443404,
+      "loss": 0.8177,
+      "step": 128
+    },
+    {
+      "epoch": 0.31,
+      "learning_rate": 0.00012796862503070062,
+      "loss": 0.9717,
+      "step": 129
+    },
+    {
+      "epoch": 0.31,
+      "learning_rate": 0.00012747548783981962,
+      "loss": 0.8813,
+      "step": 130
+    },
+    {
+      "epoch": 0.31,
+      "learning_rate": 0.00012698800808924157,
+      "loss": 0.9708,
+      "step": 131
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 0.00012650607842624748,
+      "loss": 0.8776,
+      "step": 132
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 0.0001260295943285407,
+      "loss": 0.8564,
+      "step": 133
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 0.00012555845400901656,
+      "loss": 0.8793,
+      "step": 134
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 0.0001250925583244189,
+      "loss": 0.9288,
+      "step": 135
+    },
+    {
+      "epoch": 0.33,
+      "learning_rate": 0.00012463181068769768,
+      "loss": 0.9407,
+      "step": 136
+    },
+    {
+      "epoch": 0.33,
+      "learning_rate": 0.0001241761169838914,
+      "loss": 0.9746,
+      "step": 137
+    },
+    {
+      "epoch": 0.33,
+      "learning_rate": 0.00012372538548936814,
+      "loss": 1.0109,
+      "step": 138
+    },
+    {
+      "epoch": 0.33,
+      "learning_rate": 0.00012327952679426827,
+      "loss": 1.0695,
+      "step": 139
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 0.00012283845372799884,
+      "loss": 0.9092,
+      "step": 140
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 0.00012240208128764027,
+      "loss": 0.7535,
+      "step": 141
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 0.00012197032656913024,
+      "loss": 0.7952,
+      "step": 142
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 0.00012154310870109942,
+      "loss": 0.8747,
+      "step": 143
+    },
+    {
+      "epoch": 0.35,
+      "learning_rate": 0.0001211203487812386,
+      "loss": 0.823,
+      "step": 144
+    },
+    {
+      "epoch": 0.35,
+      "learning_rate": 0.0001207019698150837,
+      "loss": 0.838,
+      "step": 145
+    },
+    {
+      "epoch": 0.35,
+      "learning_rate": 0.00012028789665711085,
+      "loss": 0.8352,
+      "step": 146
+    },
+    {
+      "epoch": 0.35,
+      "learning_rate": 0.00011987805595403907,
+      "loss": 0.9483,
+      "step": 147
+    },
+    {
+      "epoch": 0.35,
+      "learning_rate": 0.00011947237609024377,
+      "loss": 0.8841,
+      "step": 148
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 0.00011907078713518815,
+      "loss": 1.0176,
+      "step": 149
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 0.00011867322079278597,
+      "loss": 0.9113,
+      "step": 150
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 0.00011827961035261132,
+      "loss": 0.8683,
+      "step": 151
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 0.0001178898906428769,
+      "loss": 0.8158,
+      "step": 152
+    },
+    {
+      "epoch": 0.37,
+      "learning_rate": 0.0001175039979851054,
+      "loss": 0.8146,
+      "step": 153
+    },
+    {
+      "epoch": 0.37,
+      "learning_rate": 0.00011712187015042266,
+      "loss": 0.8513,
+      "step": 154
+    },
+    {
+      "epoch": 0.37,
+      "learning_rate": 0.00011674344631740369,
+      "loss": 0.8071,
+      "step": 155
+    },
+    {
+      "epoch": 0.37,
+      "learning_rate": 0.00011636866703140783,
+      "loss": 0.8923,
+      "step": 156
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 0.00011599747416534057,
+      "loss": 0.9082,
+      "step": 157
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 0.00011562981088178324,
+      "loss": 0.8323,
+      "step": 158
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 0.00011526562159643515,
+      "loss": 0.8079,
+      "step": 159
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 0.00011490485194281395,
+      "loss": 0.8623,
+      "step": 160
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 0.00011454744873816422,
+      "loss": 0.8465,
+      "step": 161
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 0.0001141933599505248,
+      "loss": 0.9027,
+      "step": 162
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 0.00011384253466690954,
+      "loss": 0.907,
+      "step": 163
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 0.00011349492306255647,
+      "loss": 0.9152,
+      "step": 164
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 0.0001131504763712036,
+      "loss": 0.7418,
+      "step": 165
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 0.00011280914685635128,
+      "loss": 0.8328,
+      "step": 166
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 0.0001124708877834722,
+      "loss": 0.9287,
+      "step": 167
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 0.00011213565339313254,
+      "loss": 0.7967,
+      "step": 168
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 0.00011180339887498949,
+      "loss": 0.84,
+      "step": 169
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 0.00011147408034263073,
+      "loss": 0.8149,
+      "step": 170
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 0.00011114765480922503,
+      "loss": 0.8555,
+      "step": 171
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 0.00011082408016395194,
+      "loss": 0.814,
+      "step": 172
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 0.00011050331514918246,
+      "loss": 0.8139,
+      "step": 173
+    },
+    {
+      "epoch": 0.42,
+      "learning_rate": 0.0001101853193383817,
+      "loss": 0.885,
+      "step": 174
+    },
+    {
+      "epoch": 0.42,
+      "learning_rate": 0.00010987005311470715,
+      "loss": 0.7682,
+      "step": 175
+    },
+    {
+      "epoch": 0.42,
+      "learning_rate": 0.00010955747765027683,
+      "loss": 0.8266,
+      "step": 176
+    },
+    {
+      "epoch": 0.42,
+      "learning_rate": 0.00010924755488608232,
+      "loss": 0.8699,
+      "step": 177
+    },
+    {
+      "epoch": 0.43,
+      "learning_rate": 0.00010894024751252352,
+      "loss": 0.957,
+      "step": 178
+    },
+    {
+      "epoch": 0.43,
+      "learning_rate": 0.00010863551895054227,
+      "loss": 0.854,
+      "step": 179
+    },
+    {
+      "epoch": 0.43,
+      "learning_rate": 0.00010833333333333333,
+      "loss": 0.7239,
+      "step": 180
+    },
+    {
+      "epoch": 0.43,
+      "learning_rate": 0.00010803365548861171,
+      "loss": 0.7825,
+      "step": 181
+    },
+    {
+      "epoch": 0.44,
+      "learning_rate": 0.00010773645092141682,
+      "loss": 0.8531,
+      "step": 182
+    },
+    {
+      "epoch": 0.44,
+      "learning_rate": 0.00010744168579743401,
+      "loss": 0.7602,
+      "step": 183
+    },
+    {
+      "epoch": 0.44,
+      "learning_rate": 0.0001071493269268154,
+      "loss": 0.8768,
+      "step": 184
+    },
+    {
+      "epoch": 0.44,
+      "learning_rate": 0.00010685934174848223,
+      "loss": 0.8294,
+      "step": 185
+    },
+    {
+      "epoch": 0.45,
+      "learning_rate": 0.00010657169831489234,
+      "loss": 0.8872,
+      "step": 186
+    },
+    {
+      "epoch": 0.45,
+      "learning_rate": 0.0001062863652772559,
+      "loss": 0.7016,
+      "step": 187
+    },
+    {
+      "epoch": 0.45,
+      "learning_rate": 0.00010600331187118435,
+      "loss": 0.8942,
+      "step": 188
+    },
+    {
+      "epoch": 0.45,
+      "learning_rate": 0.00010572250790275775,
+      "loss": 0.7416,
+      "step": 189
+    },
+    {
+      "epoch": 0.46,
+      "learning_rate": 0.00010544392373499565,
+      "loss": 0.8104,
+      "step": 190
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 417,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 6.162417152478413e+16,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-190/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:482256ef65b59af088490f9e3870f1b868a90992cd6294900bd43d13ef410480
+size 4536

checkpoint-200/README.md ADDED Viewed

	@@ -0,0 +1,220 @@

+---
+library_name: peft
+base_model: unsloth/llama-2-7b
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+## Training procedure
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+- PEFT 0.6.2

checkpoint-200/adapter_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "unsloth/llama-2-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 32,
+  "lora_dropout": 0,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "up_proj",
+    "down_proj",
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

checkpoint-200/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:79584014796fb14dcc8275c797d29df72f7b2dc5cc4b7b8a2fbfa29fdbc02ca3
+size 159967880

checkpoint-200/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c1175f1558372bc8497b475ca5b1f9f8b7d3c06b01c5ddadbf6197f16fd66e4c
+size 80630612

checkpoint-200/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:203e9bfabd925cb4ec7129d24877156fcee87215187c35a867e358e56a9425a4
+size 14244

checkpoint-200/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa613265bd9842961d3a0187165a5848341c0e2d75f3b3120add96d3b9bbb5af
+size 1064

checkpoint-200/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1219 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.47961630695443647,
+  "eval_steps": 500,
+  "global_step": 200,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.00013,
+      "loss": 1.1241,
+      "step": 1
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.00026,
+      "loss": 1.0107,
+      "step": 2
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.00039,
+      "loss": 1.1086,
+      "step": 3
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.00052,
+      "loss": 1.0044,
+      "step": 4
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.00065,
+      "loss": 1.0496,
+      "step": 5
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.0005933661039639299,
+      "loss": 1.0199,
+      "step": 6
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.0005493502655735357,
+      "loss": 1.0198,
+      "step": 7
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.0005138701197773616,
+      "loss": 0.969,
+      "step": 8
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.0004844813951249544,
+      "loss": 0.9383,
+      "step": 9
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.0004596194077712558,
+      "loss": 0.8776,
+      "step": 10
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 0.0004382299106011073,
+      "loss": 1.0173,
+      "step": 11
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 0.0004195731958391368,
+      "loss": 1.1173,
+      "step": 12
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 0.0004031128874149274,
+      "loss": 1.0876,
+      "step": 13
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 0.0003884492980336779,
+      "loss": 1.0524,
+      "step": 14
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.0003752776749732568,
+      "loss": 0.8953,
+      "step": 15
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.00036336104634371584,
+      "loss": 1.1335,
+      "step": 16
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.00035251199395531623,
+      "loss": 0.9837,
+      "step": 17
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.00034258007985157445,
+      "loss": 0.9707,
+      "step": 18
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 0.0003334429644276751,
+      "loss": 0.9149,
+      "step": 19
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 0.000325,
+      "loss": 1.0043,
+      "step": 20
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 0.00031716752370827323,
+      "loss": 1.001,
+      "step": 21
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 0.00030987534150481746,
+      "loss": 1.0395,
+      "step": 22
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.000303064062678102,
+      "loss": 0.8718,
+      "step": 23
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.00029668305198196496,
+      "loss": 1.1114,
+      "step": 24
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.00029068883707497264,
+      "loss": 0.7765,
+      "step": 25
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.0002850438562747845,
+      "loss": 0.9522,
+      "step": 26
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.00027971546389275785,
+      "loss": 0.9588,
+      "step": 27
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.00027467513278676785,
+      "loss": 1.0313,
+      "step": 28
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.0002698978095246549,
+      "loss": 0.9338,
+      "step": 29
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.000265361388801511,
+      "loss": 0.892,
+      "step": 30
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.00026104628189331215,
+      "loss": 0.893,
+      "step": 31
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.0002569350598886808,
+      "loss": 0.8983,
+      "step": 32
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.00025301215685249496,
+      "loss": 0.9277,
+      "step": 33
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.00024926362137539537,
+      "loss": 0.8962,
+      "step": 34
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.00024567690745599767,
+      "loss": 0.9124,
+      "step": 35
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.0002422406975624772,
+      "loss": 0.9535,
+      "step": 36
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.00023894475218048754,
+      "loss": 0.9019,
+      "step": 37
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.0002357797812857538,
+      "loss": 1.024,
+      "step": 38
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.00023273733406281566,
+      "loss": 0.8549,
+      "step": 39
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 0.0002298097038856279,
+      "loss": 1.0489,
+      "step": 40
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 0.00022698984612511293,
+      "loss": 0.751,
+      "step": 41
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 0.00022427130678626507,
+      "loss": 0.834,
+      "step": 42
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 0.00022164816032790388,
+      "loss": 0.889,
+      "step": 43
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 0.00021911495530055366,
+      "loss": 1.0103,
+      "step": 44
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 0.00021666666666666666,
+      "loss": 0.8766,
+      "step": 45
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 0.0002142986538536308,
+      "loss": 0.8181,
+      "step": 46
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 0.0002120066237423687,
+      "loss": 0.8754,
+      "step": 47
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 0.0002097865979195684,
+      "loss": 0.9038,
+      "step": 48
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 0.00020763488362498048,
+      "loss": 0.8646,
+      "step": 49
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 0.00020554804791094464,
+      "loss": 0.8836,
+      "step": 50
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 0.0002035228946026736,
+      "loss": 0.9962,
+      "step": 51
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 0.0002015564437074637,
+      "loss": 0.8835,
+      "step": 52
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 0.00019964591297103414,
+      "loss": 0.9196,
+      "step": 53
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 0.00019778870132130996,
+      "loss": 0.8995,
+      "step": 54
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 0.00019598237397554634,
+      "loss": 1.0178,
+      "step": 55
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 0.00019422464901683895,
+      "loss": 0.9395,
+      "step": 56
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 0.00019251338527170498,
+      "loss": 0.9882,
+      "step": 57
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 0.00019084657134227863,
+      "loss": 0.9274,
+      "step": 58
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 0.00018922231566536414,
+      "loss": 0.9517,
+      "step": 59
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 0.0001876388374866284,
+      "loss": 0.865,
+      "step": 60
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 0.00018609445865200715,
+      "loss": 0.9314,
+      "step": 61
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 0.00018458759613029606,
+      "loss": 0.9224,
+      "step": 62
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 0.00018311675519117857,
+      "loss": 0.788,
+      "step": 63
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 0.00018168052317185792,
+      "loss": 0.9739,
+      "step": 64
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 0.00018027756377319947,
+      "loss": 0.9419,
+      "step": 65
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 0.0001789066118330336,
+      "loss": 0.8772,
+      "step": 66
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 0.00017756646853014972,
+      "loss": 0.8707,
+      "step": 67
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 0.00017625599697765812,
+      "loss": 0.8089,
+      "step": 68
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 0.00017497411816890378,
+      "loss": 0.9303,
+      "step": 69
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 0.00017371980724307585,
+      "loss": 0.9161,
+      "step": 70
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 0.00017249209004113945,
+      "loss": 0.9064,
+      "step": 71
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 0.00017129003992578723,
+      "loss": 1.0988,
+      "step": 72
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 0.00017011277484181944,
+      "loss": 0.9804,
+      "step": 73
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 0.0001689594545957618,
+      "loss": 0.8382,
+      "step": 74
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 0.00016782927833565472,
+      "loss": 0.9632,
+      "step": 75
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 0.00016672148221383754,
+      "loss": 0.9494,
+      "step": 76
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 0.00016563533721722828,
+      "loss": 0.9253,
+      "step": 77
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 0.0001645701471510958,
+      "loss": 0.9143,
+      "step": 78
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 0.00016352524676365398,
+      "loss": 0.8907,
+      "step": 79
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 0.0001625,
+      "loss": 0.9748,
+      "step": 80
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 0.00016149379837498482,
+      "loss": 0.893,
+      "step": 81
+    },
+    {
+      "epoch": 0.2,
+      "learning_rate": 0.00016050605945555833,
+      "loss": 0.839,
+      "step": 82
+    },
+    {
+      "epoch": 0.2,
+      "learning_rate": 0.0001595362254439902,
+      "loss": 0.9276,
+      "step": 83
+    },
+    {
+      "epoch": 0.2,
+      "learning_rate": 0.00015858376185413662,
+      "loss": 0.8758,
+      "step": 84
+    },
+    {
+      "epoch": 0.2,
+      "learning_rate": 0.00015764815627361642,
+      "loss": 0.9125,
+      "step": 85
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 0.00015672891720538393,
+      "loss": 0.955,
+      "step": 86
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 0.00015582557298274985,
+      "loss": 0.9104,
+      "step": 87
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 0.00015493767075240873,
+      "loss": 0.8861,
+      "step": 88
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 0.0001540647755204926,
+      "loss": 0.9693,
+      "step": 89
+    },
+    {
+      "epoch": 0.22,
+      "learning_rate": 0.0001532064692570853,
+      "loss": 0.7245,
+      "step": 90
+    },
+    {
+      "epoch": 0.22,
+      "learning_rate": 0.000152362350055011,
+      "loss": 0.7523,
+      "step": 91
+    },
+    {
+      "epoch": 0.22,
+      "learning_rate": 0.000151532031339051,
+      "loss": 0.8522,
+      "step": 92
+    },
+    {
+      "epoch": 0.22,
+      "learning_rate": 0.00015071514112205468,
+      "loss": 0.9273,
+      "step": 93
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 0.0001499113213046938,
+      "loss": 1.0303,
+      "step": 94
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 0.00014912022701586513,
+      "loss": 0.9273,
+      "step": 95
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 0.00014834152599098248,
+      "loss": 0.9071,
+      "step": 96
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 0.00014757489798561242,
+      "loss": 0.954,
+      "step": 97
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 0.00014682003422210332,
+      "loss": 0.7897,
+      "step": 98
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 0.00014607663686703578,
+      "loss": 0.9045,
+      "step": 99
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 0.00014534441853748632,
+      "loss": 0.7919,
+      "step": 100
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 0.00014462310183424506,
+      "loss": 0.7449,
+      "step": 101
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 0.0001439124189002655,
+      "loss": 0.8953,
+      "step": 102
+    },
+    {
+      "epoch": 0.25,
+      "learning_rate": 0.0001432121110027503,
+      "loss": 0.974,
+      "step": 103
+    },
+    {
+      "epoch": 0.25,
+      "learning_rate": 0.00014252192813739225,
+      "loss": 0.959,
+      "step": 104
+    },
+    {
+      "epoch": 0.25,
+      "learning_rate": 0.00014184162865339505,
+      "loss": 0.8767,
+      "step": 105
+    },
+    {
+      "epoch": 0.25,
+      "learning_rate": 0.00014117097889799755,
+      "loss": 0.9206,
+      "step": 106
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 0.000140509752879313,
+      "loss": 0.8096,
+      "step": 107
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 0.00013985773194637893,
+      "loss": 0.9726,
+      "step": 108
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 0.00013921470448538878,
+      "loss": 0.7764,
+      "step": 109
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 0.00013858046563114675,
+      "loss": 0.8414,
+      "step": 110
+    },
+    {
+      "epoch": 0.27,
+      "learning_rate": 0.0001379548169928529,
+      "loss": 0.9365,
+      "step": 111
+    },
+    {
+      "epoch": 0.27,
+      "learning_rate": 0.00013733756639338393,
+      "loss": 0.9857,
+      "step": 112
+    },
+    {
+      "epoch": 0.27,
+      "learning_rate": 0.00013672852762129314,
+      "loss": 0.8209,
+      "step": 113
+    },
+    {
+      "epoch": 0.27,
+      "learning_rate": 0.00013612752019480102,
+      "loss": 0.9954,
+      "step": 114
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 0.0001355343691370986,
+      "loss": 0.9425,
+      "step": 115
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 0.00013494890476232745,
+      "loss": 0.9199,
+      "step": 116
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 0.0001343709624716425,
+      "loss": 1.0011,
+      "step": 117
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 0.00013380038255880045,
+      "loss": 0.9335,
+      "step": 118
+    },
+    {
+      "epoch": 0.29,
+      "learning_rate": 0.000133237010024753,
+      "loss": 1.0612,
+      "step": 119
+    },
+    {
+      "epoch": 0.29,
+      "learning_rate": 0.0001326806944007555,
+      "loss": 0.8794,
+      "step": 120
+    },
+    {
+      "epoch": 0.29,
+      "learning_rate": 0.00013213128957953303,
+      "loss": 0.8557,
+      "step": 121
+    },
+    {
+      "epoch": 0.29,
+      "learning_rate": 0.00013158865365407385,
+      "loss": 0.931,
+      "step": 122
+    },
+    {
+      "epoch": 0.29,
+      "learning_rate": 0.00013105264876364566,
+      "loss": 0.9648,
+      "step": 123
+    },
+    {
+      "epoch": 0.3,
+      "learning_rate": 0.00013052314094665608,
+      "loss": 0.8448,
+      "step": 124
+    },
+    {
+      "epoch": 0.3,
+      "learning_rate": 0.00013,
+      "loss": 0.9247,
+      "step": 125
+    },
+    {
+      "epoch": 0.3,
+      "learning_rate": 0.0001294830993445593,
+      "loss": 0.9537,
+      "step": 126
+    },
+    {
+      "epoch": 0.3,
+      "learning_rate": 0.00012897231589653857,
+      "loss": 0.8049,
+      "step": 127
+    },
+    {
+      "epoch": 0.31,
+      "learning_rate": 0.0001284675299443404,
+      "loss": 0.8177,
+      "step": 128
+    },
+    {
+      "epoch": 0.31,
+      "learning_rate": 0.00012796862503070062,
+      "loss": 0.9717,
+      "step": 129
+    },
+    {
+      "epoch": 0.31,
+      "learning_rate": 0.00012747548783981962,
+      "loss": 0.8813,
+      "step": 130
+    },
+    {
+      "epoch": 0.31,
+      "learning_rate": 0.00012698800808924157,
+      "loss": 0.9708,
+      "step": 131
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 0.00012650607842624748,
+      "loss": 0.8776,
+      "step": 132
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 0.0001260295943285407,
+      "loss": 0.8564,
+      "step": 133
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 0.00012555845400901656,
+      "loss": 0.8793,
+      "step": 134
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 0.0001250925583244189,
+      "loss": 0.9288,
+      "step": 135
+    },
+    {
+      "epoch": 0.33,
+      "learning_rate": 0.00012463181068769768,
+      "loss": 0.9407,
+      "step": 136
+    },
+    {
+      "epoch": 0.33,
+      "learning_rate": 0.0001241761169838914,
+      "loss": 0.9746,
+      "step": 137
+    },
+    {
+      "epoch": 0.33,
+      "learning_rate": 0.00012372538548936814,
+      "loss": 1.0109,
+      "step": 138
+    },
+    {
+      "epoch": 0.33,
+      "learning_rate": 0.00012327952679426827,
+      "loss": 1.0695,
+      "step": 139
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 0.00012283845372799884,
+      "loss": 0.9092,
+      "step": 140
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 0.00012240208128764027,
+      "loss": 0.7535,
+      "step": 141
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 0.00012197032656913024,
+      "loss": 0.7952,
+      "step": 142
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 0.00012154310870109942,
+      "loss": 0.8747,
+      "step": 143
+    },
+    {
+      "epoch": 0.35,
+      "learning_rate": 0.0001211203487812386,
+      "loss": 0.823,
+      "step": 144
+    },
+    {
+      "epoch": 0.35,
+      "learning_rate": 0.0001207019698150837,
+      "loss": 0.838,
+      "step": 145
+    },
+    {
+      "epoch": 0.35,
+      "learning_rate": 0.00012028789665711085,
+      "loss": 0.8352,
+      "step": 146
+    },
+    {
+      "epoch": 0.35,
+      "learning_rate": 0.00011987805595403907,
+      "loss": 0.9483,
+      "step": 147
+    },
+    {
+      "epoch": 0.35,
+      "learning_rate": 0.00011947237609024377,
+      "loss": 0.8841,
+      "step": 148
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 0.00011907078713518815,
+      "loss": 1.0176,
+      "step": 149
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 0.00011867322079278597,
+      "loss": 0.9113,
+      "step": 150
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 0.00011827961035261132,
+      "loss": 0.8683,
+      "step": 151
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 0.0001178898906428769,
+      "loss": 0.8158,
+      "step": 152
+    },
+    {
+      "epoch": 0.37,
+      "learning_rate": 0.0001175039979851054,
+      "loss": 0.8146,
+      "step": 153
+    },
+    {
+      "epoch": 0.37,
+      "learning_rate": 0.00011712187015042266,
+      "loss": 0.8513,
+      "step": 154
+    },
+    {
+      "epoch": 0.37,
+      "learning_rate": 0.00011674344631740369,
+      "loss": 0.8071,
+      "step": 155
+    },
+    {
+      "epoch": 0.37,
+      "learning_rate": 0.00011636866703140783,
+      "loss": 0.8923,
+      "step": 156
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 0.00011599747416534057,
+      "loss": 0.9082,
+      "step": 157
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 0.00011562981088178324,
+      "loss": 0.8323,
+      "step": 158
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 0.00011526562159643515,
+      "loss": 0.8079,
+      "step": 159
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 0.00011490485194281395,
+      "loss": 0.8623,
+      "step": 160
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 0.00011454744873816422,
+      "loss": 0.8465,
+      "step": 161
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 0.0001141933599505248,
+      "loss": 0.9027,
+      "step": 162
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 0.00011384253466690954,
+      "loss": 0.907,
+      "step": 163
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 0.00011349492306255647,
+      "loss": 0.9152,
+      "step": 164
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 0.0001131504763712036,
+      "loss": 0.7418,
+      "step": 165
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 0.00011280914685635128,
+      "loss": 0.8328,
+      "step": 166
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 0.0001124708877834722,
+      "loss": 0.9287,
+      "step": 167
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 0.00011213565339313254,
+      "loss": 0.7967,
+      "step": 168
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 0.00011180339887498949,
+      "loss": 0.84,
+      "step": 169
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 0.00011147408034263073,
+      "loss": 0.8149,
+      "step": 170
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 0.00011114765480922503,
+      "loss": 0.8555,
+      "step": 171
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 0.00011082408016395194,
+      "loss": 0.814,
+      "step": 172
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 0.00011050331514918246,
+      "loss": 0.8139,
+      "step": 173
+    },
+    {
+      "epoch": 0.42,
+      "learning_rate": 0.0001101853193383817,
+      "loss": 0.885,
+      "step": 174
+    },
+    {
+      "epoch": 0.42,
+      "learning_rate": 0.00010987005311470715,
+      "loss": 0.7682,
+      "step": 175
+    },
+    {
+      "epoch": 0.42,
+      "learning_rate": 0.00010955747765027683,
+      "loss": 0.8266,
+      "step": 176
+    },
+    {
+      "epoch": 0.42,
+      "learning_rate": 0.00010924755488608232,
+      "loss": 0.8699,
+      "step": 177
+    },
+    {
+      "epoch": 0.43,
+      "learning_rate": 0.00010894024751252352,
+      "loss": 0.957,
+      "step": 178
+    },
+    {
+      "epoch": 0.43,
+      "learning_rate": 0.00010863551895054227,
+      "loss": 0.854,
+      "step": 179
+    },
+    {
+      "epoch": 0.43,
+      "learning_rate": 0.00010833333333333333,
+      "loss": 0.7239,
+      "step": 180
+    },
+    {
+      "epoch": 0.43,
+      "learning_rate": 0.00010803365548861171,
+      "loss": 0.7825,
+      "step": 181
+    },
+    {
+      "epoch": 0.44,
+      "learning_rate": 0.00010773645092141682,
+      "loss": 0.8531,
+      "step": 182
+    },
+    {
+      "epoch": 0.44,
+      "learning_rate": 0.00010744168579743401,
+      "loss": 0.7602,
+      "step": 183
+    },
+    {
+      "epoch": 0.44,
+      "learning_rate": 0.0001071493269268154,
+      "loss": 0.8768,
+      "step": 184
+    },
+    {
+      "epoch": 0.44,
+      "learning_rate": 0.00010685934174848223,
+      "loss": 0.8294,
+      "step": 185
+    },
+    {
+      "epoch": 0.45,
+      "learning_rate": 0.00010657169831489234,
+      "loss": 0.8872,
+      "step": 186
+    },
+    {
+      "epoch": 0.45,
+      "learning_rate": 0.0001062863652772559,
+      "loss": 0.7016,
+      "step": 187
+    },
+    {
+      "epoch": 0.45,
+      "learning_rate": 0.00010600331187118435,
+      "loss": 0.8942,
+      "step": 188
+    },
+    {
+      "epoch": 0.45,
+      "learning_rate": 0.00010572250790275775,
+      "loss": 0.7416,
+      "step": 189
+    },
+    {
+      "epoch": 0.46,
+      "learning_rate": 0.00010544392373499565,
+      "loss": 0.8104,
+      "step": 190
+    },
+    {
+      "epoch": 0.46,
+      "learning_rate": 0.0001051675302747182,
+      "loss": 0.8349,
+      "step": 191
+    },
+    {
+      "epoch": 0.46,
+      "learning_rate": 0.0001048932989597842,
+      "loss": 0.9013,
+      "step": 192
+    },
+    {
+      "epoch": 0.46,
+      "learning_rate": 0.00010462120174669319,
+      "loss": 0.7646,
+      "step": 193
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 0.00010435121109853953,
+      "loss": 0.8087,
+      "step": 194
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 0.00010408329997330662,
+      "loss": 0.9798,
+      "step": 195
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 0.00010381744181249024,
+      "loss": 0.7266,
+      "step": 196
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 0.0001035536105300395,
+      "loss": 0.8502,
+      "step": 197
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 0.00010329178050160582,
+      "loss": 0.7797,
+      "step": 198
+    },
+    {
+      "epoch": 0.48,
+      "learning_rate": 0.00010303192655408924,
+      "loss": 0.7328,
+      "step": 199
+    },
+    {
+      "epoch": 0.48,
+      "learning_rate": 0.00010277402395547232,
+      "loss": 0.7916,
+      "step": 200
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 417,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 6.487869967879373e+16,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-200/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:482256ef65b59af088490f9e3870f1b868a90992cd6294900bd43d13ef410480
+size 4536

checkpoint-40/README.md ADDED Viewed

	@@ -0,0 +1,220 @@

+---
+library_name: peft
+base_model: unsloth/llama-2-7b
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+## Training procedure
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+- PEFT 0.6.2

checkpoint-40/adapter_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "unsloth/llama-2-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 32,
+  "lora_dropout": 0,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "up_proj",
+    "down_proj",
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

checkpoint-40/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1d879b575ccbc5870446f4660d9f84e87e57d8943adb6905d66d7b2416a58cd2
+size 159967880

checkpoint-40/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:abf198c2832597a6e8d36b6dbdbd6ba07ddd67e809dbe44b9c8cde5a9d16b77f
+size 80630612

checkpoint-40/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:203e9bfabd925cb4ec7129d24877156fcee87215187c35a867e358e56a9425a4
+size 14244

checkpoint-40/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d1a123c2e2869cfd4ae8b10396584d8f5b9752e5427db6f1a6efa11fcc47bfcd
+size 1064

checkpoint-40/trainer_state.json ADDED Viewed

	@@ -0,0 +1,259 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.09592326139088729,
+  "eval_steps": 500,
+  "global_step": 40,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.00013,
+      "loss": 1.1241,
+      "step": 1
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.00026,
+      "loss": 1.0107,
+      "step": 2
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.00039,
+      "loss": 1.1086,
+      "step": 3
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.00052,
+      "loss": 1.0044,
+      "step": 4
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.00065,
+      "loss": 1.0496,
+      "step": 5
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.0005933661039639299,
+      "loss": 1.0199,
+      "step": 6
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.0005493502655735357,
+      "loss": 1.0198,
+      "step": 7
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.0005138701197773616,
+      "loss": 0.969,
+      "step": 8
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.0004844813951249544,
+      "loss": 0.9383,
+      "step": 9
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.0004596194077712558,
+      "loss": 0.8776,
+      "step": 10
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 0.0004382299106011073,
+      "loss": 1.0173,
+      "step": 11
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 0.0004195731958391368,
+      "loss": 1.1173,
+      "step": 12
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 0.0004031128874149274,
+      "loss": 1.0876,
+      "step": 13
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 0.0003884492980336779,
+      "loss": 1.0524,
+      "step": 14
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.0003752776749732568,
+      "loss": 0.8953,
+      "step": 15
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.00036336104634371584,
+      "loss": 1.1335,
+      "step": 16
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.00035251199395531623,
+      "loss": 0.9837,
+      "step": 17
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.00034258007985157445,
+      "loss": 0.9707,
+      "step": 18
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 0.0003334429644276751,
+      "loss": 0.9149,
+      "step": 19
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 0.000325,
+      "loss": 1.0043,
+      "step": 20
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 0.00031716752370827323,
+      "loss": 1.001,
+      "step": 21
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 0.00030987534150481746,
+      "loss": 1.0395,
+      "step": 22
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.000303064062678102,
+      "loss": 0.8718,
+      "step": 23
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.00029668305198196496,
+      "loss": 1.1114,
+      "step": 24
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.00029068883707497264,
+      "loss": 0.7765,
+      "step": 25
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.0002850438562747845,
+      "loss": 0.9522,
+      "step": 26
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.00027971546389275785,
+      "loss": 0.9588,
+      "step": 27
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.00027467513278676785,
+      "loss": 1.0313,
+      "step": 28
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.0002698978095246549,
+      "loss": 0.9338,
+      "step": 29
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.000265361388801511,
+      "loss": 0.892,
+      "step": 30
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.00026104628189331215,
+      "loss": 0.893,
+      "step": 31
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.0002569350598886808,
+      "loss": 0.8983,
+      "step": 32
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.00025301215685249496,
+      "loss": 0.9277,
+      "step": 33
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.00024926362137539537,
+      "loss": 0.8962,
+      "step": 34
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.00024567690745599767,
+      "loss": 0.9124,
+      "step": 35
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.0002422406975624772,
+      "loss": 0.9535,
+      "step": 36
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.00023894475218048754,
+      "loss": 0.9019,
+      "step": 37
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.0002357797812857538,
+      "loss": 1.024,
+      "step": 38
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.00023273733406281566,
+      "loss": 0.8549,
+      "step": 39
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 0.0002298097038856279,
+      "loss": 1.0489,
+      "step": 40
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 417,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 1.2924943770845184e+16,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-40/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:482256ef65b59af088490f9e3870f1b868a90992cd6294900bd43d13ef410480
+size 4536

checkpoint-80/README.md ADDED Viewed

	@@ -0,0 +1,220 @@

+---
+library_name: peft
+base_model: unsloth/llama-2-7b
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+## Training procedure
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+- PEFT 0.6.2

checkpoint-80/adapter_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "unsloth/llama-2-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 32,
+  "lora_dropout": 0,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "up_proj",
+    "down_proj",
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

checkpoint-80/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b9f21852fc41174f2b5b87cccc6d4a5d07a4660480a9d9fdd6ebc875883cf30d
+size 159967880

checkpoint-80/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:53f79c6b8e0d8ad36a77715fff5512b2deddade328c167b641b9dfae8933876b
+size 80630612

checkpoint-80/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:203e9bfabd925cb4ec7129d24877156fcee87215187c35a867e358e56a9425a4
+size 14244

checkpoint-80/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88790c0dfb035758d1c04e3296c3d510b282a3a11865d663fde0aa5d8dc72699
+size 1064

checkpoint-80/trainer_state.json ADDED Viewed

	@@ -0,0 +1,499 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.19184652278177458,
+  "eval_steps": 500,
+  "global_step": 80,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.00013,
+      "loss": 1.1241,
+      "step": 1
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.00026,
+      "loss": 1.0107,
+      "step": 2
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.00039,
+      "loss": 1.1086,
+      "step": 3
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.00052,
+      "loss": 1.0044,
+      "step": 4
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.00065,
+      "loss": 1.0496,
+      "step": 5
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.0005933661039639299,
+      "loss": 1.0199,
+      "step": 6
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.0005493502655735357,
+      "loss": 1.0198,
+      "step": 7
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.0005138701197773616,
+      "loss": 0.969,
+      "step": 8
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.0004844813951249544,
+      "loss": 0.9383,
+      "step": 9
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.0004596194077712558,
+      "loss": 0.8776,
+      "step": 10
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 0.0004382299106011073,
+      "loss": 1.0173,
+      "step": 11
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 0.0004195731958391368,
+      "loss": 1.1173,
+      "step": 12
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 0.0004031128874149274,
+      "loss": 1.0876,
+      "step": 13
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 0.0003884492980336779,
+      "loss": 1.0524,
+      "step": 14
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.0003752776749732568,
+      "loss": 0.8953,
+      "step": 15
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.00036336104634371584,
+      "loss": 1.1335,
+      "step": 16
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.00035251199395531623,
+      "loss": 0.9837,
+      "step": 17
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.00034258007985157445,
+      "loss": 0.9707,
+      "step": 18
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 0.0003334429644276751,
+      "loss": 0.9149,
+      "step": 19
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 0.000325,
+      "loss": 1.0043,
+      "step": 20
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 0.00031716752370827323,
+      "loss": 1.001,
+      "step": 21
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 0.00030987534150481746,
+      "loss": 1.0395,
+      "step": 22
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.000303064062678102,
+      "loss": 0.8718,
+      "step": 23
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.00029668305198196496,
+      "loss": 1.1114,
+      "step": 24
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.00029068883707497264,
+      "loss": 0.7765,
+      "step": 25
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.0002850438562747845,
+      "loss": 0.9522,
+      "step": 26
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.00027971546389275785,
+      "loss": 0.9588,
+      "step": 27
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.00027467513278676785,
+      "loss": 1.0313,
+      "step": 28
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.0002698978095246549,
+      "loss": 0.9338,
+      "step": 29
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.000265361388801511,
+      "loss": 0.892,
+      "step": 30
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.00026104628189331215,
+      "loss": 0.893,
+      "step": 31
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.0002569350598886808,
+      "loss": 0.8983,
+      "step": 32
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.00025301215685249496,
+      "loss": 0.9277,
+      "step": 33
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.00024926362137539537,
+      "loss": 0.8962,
+      "step": 34
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.00024567690745599767,
+      "loss": 0.9124,
+      "step": 35
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.0002422406975624772,
+      "loss": 0.9535,
+      "step": 36
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.00023894475218048754,
+      "loss": 0.9019,
+      "step": 37
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.0002357797812857538,
+      "loss": 1.024,
+      "step": 38
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.00023273733406281566,
+      "loss": 0.8549,
+      "step": 39
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 0.0002298097038856279,
+      "loss": 1.0489,
+      "step": 40
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 0.00022698984612511293,
+      "loss": 0.751,
+      "step": 41
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 0.00022427130678626507,
+      "loss": 0.834,
+      "step": 42
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 0.00022164816032790388,
+      "loss": 0.889,
+      "step": 43
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 0.00021911495530055366,
+      "loss": 1.0103,
+      "step": 44
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 0.00021666666666666666,
+      "loss": 0.8766,
+      "step": 45
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 0.0002142986538536308,
+      "loss": 0.8181,
+      "step": 46
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 0.0002120066237423687,
+      "loss": 0.8754,
+      "step": 47
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 0.0002097865979195684,
+      "loss": 0.9038,
+      "step": 48
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 0.00020763488362498048,
+      "loss": 0.8646,
+      "step": 49
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 0.00020554804791094464,
+      "loss": 0.8836,
+      "step": 50
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 0.0002035228946026736,
+      "loss": 0.9962,
+      "step": 51
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 0.0002015564437074637,
+      "loss": 0.8835,
+      "step": 52
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 0.00019964591297103414,
+      "loss": 0.9196,
+      "step": 53
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 0.00019778870132130996,
+      "loss": 0.8995,
+      "step": 54
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 0.00019598237397554634,
+      "loss": 1.0178,
+      "step": 55
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 0.00019422464901683895,
+      "loss": 0.9395,
+      "step": 56
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 0.00019251338527170498,
+      "loss": 0.9882,
+      "step": 57
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 0.00019084657134227863,
+      "loss": 0.9274,
+      "step": 58
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 0.00018922231566536414,
+      "loss": 0.9517,
+      "step": 59
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 0.0001876388374866284,
+      "loss": 0.865,
+      "step": 60
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 0.00018609445865200715,
+      "loss": 0.9314,
+      "step": 61
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 0.00018458759613029606,
+      "loss": 0.9224,
+      "step": 62
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 0.00018311675519117857,
+      "loss": 0.788,
+      "step": 63
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 0.00018168052317185792,
+      "loss": 0.9739,
+      "step": 64
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 0.00018027756377319947,
+      "loss": 0.9419,
+      "step": 65
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 0.0001789066118330336,
+      "loss": 0.8772,
+      "step": 66
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 0.00017756646853014972,
+      "loss": 0.8707,
+      "step": 67
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 0.00017625599697765812,
+      "loss": 0.8089,
+      "step": 68
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 0.00017497411816890378,
+      "loss": 0.9303,
+      "step": 69
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 0.00017371980724307585,
+      "loss": 0.9161,
+      "step": 70
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 0.00017249209004113945,
+      "loss": 0.9064,
+      "step": 71
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 0.00017129003992578723,
+      "loss": 1.0988,
+      "step": 72
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 0.00017011277484181944,
+      "loss": 0.9804,
+      "step": 73
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 0.0001689594545957618,
+      "loss": 0.8382,
+      "step": 74
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 0.00016782927833565472,
+      "loss": 0.9632,
+      "step": 75
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 0.00016672148221383754,
+      "loss": 0.9494,
+      "step": 76
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 0.00016563533721722828,
+      "loss": 0.9253,
+      "step": 77
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 0.0001645701471510958,
+      "loss": 0.9143,
+      "step": 78
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 0.00016352524676365398,
+      "loss": 0.8907,
+      "step": 79
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 0.0001625,
+      "loss": 0.9748,
+      "step": 80
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 417,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 2.5917530675871744e+16,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-80/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:482256ef65b59af088490f9e3870f1b868a90992cd6294900bd43d13ef410480
+size 4536

checkpoint-90/README.md ADDED Viewed

	@@ -0,0 +1,220 @@

+---
+library_name: peft
+base_model: unsloth/llama-2-7b
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+## Training procedure
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+- PEFT 0.6.2

checkpoint-90/adapter_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "unsloth/llama-2-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 32,
+  "lora_dropout": 0,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "up_proj",
+    "down_proj",
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

checkpoint-90/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:351c23da56b4cc54ecd06069484fb435da7dbe8d494862dc600081ac95939f3e
+size 159967880

checkpoint-90/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f6fe15fe30435e02f95eae519a4d54f175c25c64f13b10997c9d389880aa095f
+size 80630612

checkpoint-90/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:203e9bfabd925cb4ec7129d24877156fcee87215187c35a867e358e56a9425a4
+size 14244

checkpoint-90/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:422a39896586b55e3cf9fa259c2974dc0869f487be4c61f453a4bc7adf508043
+size 1064

checkpoint-90/trainer_state.json ADDED Viewed

	@@ -0,0 +1,559 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.2158273381294964,
+  "eval_steps": 500,
+  "global_step": 90,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.00013,
+      "loss": 1.1241,
+      "step": 1
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.00026,
+      "loss": 1.0107,
+      "step": 2
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.00039,
+      "loss": 1.1086,
+      "step": 3
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.00052,
+      "loss": 1.0044,
+      "step": 4
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.00065,
+      "loss": 1.0496,
+      "step": 5
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.0005933661039639299,
+      "loss": 1.0199,
+      "step": 6
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.0005493502655735357,
+      "loss": 1.0198,
+      "step": 7
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.0005138701197773616,
+      "loss": 0.969,
+      "step": 8
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.0004844813951249544,
+      "loss": 0.9383,
+      "step": 9
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.0004596194077712558,
+      "loss": 0.8776,
+      "step": 10
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 0.0004382299106011073,
+      "loss": 1.0173,
+      "step": 11
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 0.0004195731958391368,
+      "loss": 1.1173,
+      "step": 12
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 0.0004031128874149274,
+      "loss": 1.0876,
+      "step": 13
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 0.0003884492980336779,
+      "loss": 1.0524,
+      "step": 14
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.0003752776749732568,
+      "loss": 0.8953,
+      "step": 15
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.00036336104634371584,
+      "loss": 1.1335,
+      "step": 16
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.00035251199395531623,
+      "loss": 0.9837,
+      "step": 17
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.00034258007985157445,
+      "loss": 0.9707,
+      "step": 18
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 0.0003334429644276751,
+      "loss": 0.9149,
+      "step": 19
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 0.000325,
+      "loss": 1.0043,
+      "step": 20
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 0.00031716752370827323,
+      "loss": 1.001,
+      "step": 21
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 0.00030987534150481746,
+      "loss": 1.0395,
+      "step": 22
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.000303064062678102,
+      "loss": 0.8718,
+      "step": 23
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.00029668305198196496,
+      "loss": 1.1114,
+      "step": 24
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.00029068883707497264,
+      "loss": 0.7765,
+      "step": 25
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.0002850438562747845,
+      "loss": 0.9522,
+      "step": 26
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.00027971546389275785,
+      "loss": 0.9588,
+      "step": 27
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.00027467513278676785,
+      "loss": 1.0313,
+      "step": 28
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.0002698978095246549,
+      "loss": 0.9338,
+      "step": 29
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.000265361388801511,
+      "loss": 0.892,
+      "step": 30
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.00026104628189331215,
+      "loss": 0.893,
+      "step": 31
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.0002569350598886808,
+      "loss": 0.8983,
+      "step": 32
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.00025301215685249496,
+      "loss": 0.9277,
+      "step": 33
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.00024926362137539537,
+      "loss": 0.8962,
+      "step": 34
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.00024567690745599767,
+      "loss": 0.9124,
+      "step": 35
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.0002422406975624772,
+      "loss": 0.9535,
+      "step": 36
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.00023894475218048754,
+      "loss": 0.9019,
+      "step": 37
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.0002357797812857538,
+      "loss": 1.024,
+      "step": 38
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.00023273733406281566,
+      "loss": 0.8549,
+      "step": 39
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 0.0002298097038856279,
+      "loss": 1.0489,
+      "step": 40
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 0.00022698984612511293,
+      "loss": 0.751,
+      "step": 41
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 0.00022427130678626507,
+      "loss": 0.834,
+      "step": 42
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 0.00022164816032790388,
+      "loss": 0.889,
+      "step": 43
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 0.00021911495530055366,
+      "loss": 1.0103,
+      "step": 44
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 0.00021666666666666666,
+      "loss": 0.8766,
+      "step": 45
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 0.0002142986538536308,
+      "loss": 0.8181,
+      "step": 46
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 0.0002120066237423687,
+      "loss": 0.8754,
+      "step": 47
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 0.0002097865979195684,
+      "loss": 0.9038,
+      "step": 48
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 0.00020763488362498048,
+      "loss": 0.8646,
+      "step": 49
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 0.00020554804791094464,
+      "loss": 0.8836,
+      "step": 50
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 0.0002035228946026736,
+      "loss": 0.9962,
+      "step": 51
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 0.0002015564437074637,
+      "loss": 0.8835,
+      "step": 52
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 0.00019964591297103414,
+      "loss": 0.9196,
+      "step": 53
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 0.00019778870132130996,
+      "loss": 0.8995,
+      "step": 54
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 0.00019598237397554634,
+      "loss": 1.0178,
+      "step": 55
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 0.00019422464901683895,
+      "loss": 0.9395,
+      "step": 56
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 0.00019251338527170498,
+      "loss": 0.9882,
+      "step": 57
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 0.00019084657134227863,
+      "loss": 0.9274,
+      "step": 58
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 0.00018922231566536414,
+      "loss": 0.9517,
+      "step": 59
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 0.0001876388374866284,
+      "loss": 0.865,
+      "step": 60
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 0.00018609445865200715,
+      "loss": 0.9314,
+      "step": 61
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 0.00018458759613029606,
+      "loss": 0.9224,
+      "step": 62
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 0.00018311675519117857,
+      "loss": 0.788,
+      "step": 63
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 0.00018168052317185792,
+      "loss": 0.9739,
+      "step": 64
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 0.00018027756377319947,
+      "loss": 0.9419,
+      "step": 65
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 0.0001789066118330336,
+      "loss": 0.8772,
+      "step": 66
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 0.00017756646853014972,
+      "loss": 0.8707,
+      "step": 67
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 0.00017625599697765812,
+      "loss": 0.8089,
+      "step": 68
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 0.00017497411816890378,
+      "loss": 0.9303,
+      "step": 69
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 0.00017371980724307585,
+      "loss": 0.9161,
+      "step": 70
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 0.00017249209004113945,
+      "loss": 0.9064,
+      "step": 71
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 0.00017129003992578723,
+      "loss": 1.0988,
+      "step": 72
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 0.00017011277484181944,
+      "loss": 0.9804,
+      "step": 73
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 0.0001689594545957618,
+      "loss": 0.8382,
+      "step": 74
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 0.00016782927833565472,
+      "loss": 0.9632,
+      "step": 75
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 0.00016672148221383754,
+      "loss": 0.9494,
+      "step": 76
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 0.00016563533721722828,
+      "loss": 0.9253,
+      "step": 77
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 0.0001645701471510958,
+      "loss": 0.9143,
+      "step": 78
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 0.00016352524676365398,
+      "loss": 0.8907,
+      "step": 79
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 0.0001625,
+      "loss": 0.9748,
+      "step": 80
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 0.00016149379837498482,
+      "loss": 0.893,
+      "step": 81
+    },
+    {
+      "epoch": 0.2,
+      "learning_rate": 0.00016050605945555833,
+      "loss": 0.839,
+      "step": 82
+    },
+    {
+      "epoch": 0.2,
+      "learning_rate": 0.0001595362254439902,
+      "loss": 0.9276,
+      "step": 83
+    },
+    {
+      "epoch": 0.2,
+      "learning_rate": 0.00015858376185413662,
+      "loss": 0.8758,
+      "step": 84
+    },
+    {
+      "epoch": 0.2,
+      "learning_rate": 0.00015764815627361642,
+      "loss": 0.9125,
+      "step": 85
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 0.00015672891720538393,
+      "loss": 0.955,
+      "step": 86
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 0.00015582557298274985,
+      "loss": 0.9104,
+      "step": 87
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 0.00015493767075240873,
+      "loss": 0.8861,
+      "step": 88
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 0.0001540647755204926,
+      "loss": 0.9693,
+      "step": 89
+    },
+    {
+      "epoch": 0.22,
+      "learning_rate": 0.0001532064692570853,
+      "loss": 0.7245,
+      "step": 90
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 417,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 2.9172058829881344e+16,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-90/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:482256ef65b59af088490f9e3870f1b868a90992cd6294900bd43d13ef410480
+size 4536