wish6424/Mixtral-8x7B-prostate-sum-test

Files changed (4) hide show

README.md CHANGED Viewed

@@ -20,7 +20,12 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) on the generator dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.3804
 ## Model description
@@ -40,21 +45,13 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 2.5e-05
-- train_batch_size: 2
 - eval_batch_size: 8
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 0.03
-- training_steps: 20
-### Training results
-| Training Loss | Epoch | Step | Validation Loss |
-|:-------------:|:-----:|:----:|:---------------:|
-| 1.2988        | 0.83  | 10   | 1.4094          |
-| 1.3009        | 1.67  | 20   | 1.3804          |
 ### Framework versions

 This model is a fine-tuned version of [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) on the generator dataset.
 It achieves the following results on the evaluation set:
+- eval_loss: 0.9034
+- eval_runtime: 1.0713
+- eval_samples_per_second: 0.933
+- eval_steps_per_second: 0.933
+- epoch: 41.67
+- step: 250
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 2.5e-05
+- train_batch_size: 4
 - eval_batch_size: 8
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 0.03
+- training_steps: 1000
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -19,14 +19,14 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "lm_head",
     "up_proj",
-    "o_proj",
-    "down_proj",
-    "gate_proj",
     "v_proj",
-    "k_proj",
-    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_rslora": false

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "up_proj",
     "v_proj",
+    "gate_proj",
+    "lm_head",
+    "q_proj",
+    "down_proj",
+    "o_proj",
+    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_rslora": false

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9a25a4b2c82aa3cb1a6043c0c6070026e67adf407503de5985335e74b84e9272
-size 751667752

 version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4445dc746189473b3570b128660fe508b0516563bbc3e7ed6b12b71e30c0d9b0
 size 4728

 version https://git-lfs.github.com/spec/v1
+oid sha256:ab72c733dc6e47c7b384d00f452f5d4378116cc3fb8c04b2a7a452a0404da25a
 size 4728