operons/mistral_7x8b

Files changed (7) hide show

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-base_model: mistralai/Mistral-7B-Instruct-v0.2
 datasets:
 - generator
 library_name: peft
@@ -18,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
 # mistral_instruct_generation2
-This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on the generator dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.1644
 ## Model description
@@ -40,7 +40,7 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 2e-05
-- train_batch_size: 16
 - eval_batch_size: 8
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
@@ -52,12 +52,12 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 0.2038        | 1.0   | 61   | 0.2067          |
-| 0.1683        | 2.0   | 122  | 0.1759          |
-| 0.1612        | 3.0   | 183  | 0.1706          |
-| 0.158         | 4.0   | 244  | 0.1669          |
-| 0.1565        | 5.0   | 305  | 0.1660          |
-| 0.156         | 6.0   | 366  | 0.1644          |
 ### Framework versions

 ---
+base_model: mistralai/Mixtral-8x7B-Instruct-v0.1
 datasets:
 - generator
 library_name: peft
 # mistral_instruct_generation2
+This model is a fine-tuned version of [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) on the generator dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.1836
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 2e-05
+- train_batch_size: 4
 - eval_batch_size: 8
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 0.1924        | 1.0   | 373  | 0.2022          |
+| 0.182         | 2.0   | 746  | 0.1896          |
+| 0.1792        | 3.0   | 1119 | 0.1864          |
+| 0.177         | 4.0   | 1492 | 0.1850          |
+| 0.174         | 5.0   | 1865 | 0.1842          |
+| 0.1723        | 6.0   | 2238 | 0.1836          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
@@ -20,8 +20,8 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
-    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mixtral-8x7B-Instruct-v0.1",
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "v_proj",
+    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:28872b811e1232a31edfa433f32ca907b507958cc861da28d98f5b3708dfcdaa
 size 109069176

 version https://git-lfs.github.com/spec/v1
+oid sha256:b09616b9b950d77576c2f681e0e1c1c5188954c34c6819d5f71cebd32f58eac1
 size 109069176

runs/Jul22_13-31-35_7fb9740056ca/events.out.tfevents.1721655106.7fb9740056ca.1301.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:260df9193956b31212a84509dfde6d52efbbc404806e30ae84579b7c662d631f
+size 4184

runs/Jul22_13-32-32_7fb9740056ca/events.out.tfevents.1721655162.7fb9740056ca.1301.1 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:306544f26e9684f8326d2bbb552bdbadd7ab235e09ee262932feaa0bc112753e
+size 4184

runs/Jul22_13-41-24_7fb9740056ca/events.out.tfevents.1721655700.7fb9740056ca.6628.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ca80cb6d044942810295a2e198dc1073a6749907d7111a172722a38715af783
+size 54777

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a9f33dbad024860b5bfc83c54564a9d1891fc7679a59a2b209595ece712cd8c7
 size 5368

 version https://git-lfs.github.com/spec/v1
+oid sha256:07cb9d177fb2cf00b868540442deb33510288c86e9bbc8be01d6c76645740563
 size 5368