bif02 commited on
Commit
9aa49c1
1 Parent(s): a21f879

operons/mistral_7x8b

Browse files
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- base_model: mistralai/Mistral-7B-Instruct-v0.2
3
  datasets:
4
  - generator
5
  library_name: peft
@@ -18,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  # mistral_instruct_generation2
20
 
21
- This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
- - Loss: 0.1644
24
 
25
  ## Model description
26
 
@@ -40,7 +40,7 @@ More information needed
40
 
41
  The following hyperparameters were used during training:
42
  - learning_rate: 2e-05
43
- - train_batch_size: 16
44
  - eval_batch_size: 8
45
  - seed: 42
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
@@ -52,12 +52,12 @@ The following hyperparameters were used during training:
52
 
53
  | Training Loss | Epoch | Step | Validation Loss |
54
  |:-------------:|:-----:|:----:|:---------------:|
55
- | 0.2038 | 1.0 | 61 | 0.2067 |
56
- | 0.1683 | 2.0 | 122 | 0.1759 |
57
- | 0.1612 | 3.0 | 183 | 0.1706 |
58
- | 0.158 | 4.0 | 244 | 0.1669 |
59
- | 0.1565 | 5.0 | 305 | 0.1660 |
60
- | 0.156 | 6.0 | 366 | 0.1644 |
61
 
62
 
63
  ### Framework versions
 
1
  ---
2
+ base_model: mistralai/Mixtral-8x7B-Instruct-v0.1
3
  datasets:
4
  - generator
5
  library_name: peft
 
18
 
19
  # mistral_instruct_generation2
20
 
21
+ This model is a fine-tuned version of [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 0.1836
24
 
25
  ## Model description
26
 
 
40
 
41
  The following hyperparameters were used during training:
42
  - learning_rate: 2e-05
43
+ - train_batch_size: 4
44
  - eval_batch_size: 8
45
  - seed: 42
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 
52
 
53
  | Training Loss | Epoch | Step | Validation Loss |
54
  |:-------------:|:-----:|:----:|:---------------:|
55
+ | 0.1924 | 1.0 | 373 | 0.2022 |
56
+ | 0.182 | 2.0 | 746 | 0.1896 |
57
+ | 0.1792 | 3.0 | 1119 | 0.1864 |
58
+ | 0.177 | 4.0 | 1492 | 0.1850 |
59
+ | 0.174 | 5.0 | 1865 | 0.1842 |
60
+ | 0.1723 | 6.0 | 2238 | 0.1836 |
61
 
62
 
63
  ### Framework versions
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
@@ -20,8 +20,8 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "q_proj",
24
- "v_proj"
25
  ],
26
  "task_type": "CAUSAL_LM",
27
  "use_dora": false,
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "mistralai/Mixtral-8x7B-Instruct-v0.1",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "v_proj",
24
+ "q_proj"
25
  ],
26
  "task_type": "CAUSAL_LM",
27
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28872b811e1232a31edfa433f32ca907b507958cc861da28d98f5b3708dfcdaa
3
  size 109069176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b09616b9b950d77576c2f681e0e1c1c5188954c34c6819d5f71cebd32f58eac1
3
  size 109069176
runs/Jul22_13-31-35_7fb9740056ca/events.out.tfevents.1721655106.7fb9740056ca.1301.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:260df9193956b31212a84509dfde6d52efbbc404806e30ae84579b7c662d631f
3
+ size 4184
runs/Jul22_13-32-32_7fb9740056ca/events.out.tfevents.1721655162.7fb9740056ca.1301.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:306544f26e9684f8326d2bbb552bdbadd7ab235e09ee262932feaa0bc112753e
3
+ size 4184
runs/Jul22_13-41-24_7fb9740056ca/events.out.tfevents.1721655700.7fb9740056ca.6628.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ca80cb6d044942810295a2e198dc1073a6749907d7111a172722a38715af783
3
+ size 54777
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9f33dbad024860b5bfc83c54564a9d1891fc7679a59a2b209595ece712cd8c7
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07cb9d177fb2cf00b868540442deb33510288c86e9bbc8be01d6c76645740563
3
  size 5368