yyx123 commited on
Commit
5db4d09
1 Parent(s): fc33c37

Model save

Browse files
README.md CHANGED
@@ -2,13 +2,9 @@
2
  license: other
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
- - generated_from_trainer
7
  - trl
8
  - sft
9
  - generated_from_trainer
10
- datasets:
11
- - ruozhiba
12
  base_model: 01-ai/Yi-6B
13
  model-index:
14
  - name: Yi-6B-ruozhiba-1e-5
@@ -20,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  # Yi-6B-ruozhiba-1e-5
22
 
23
- This model is a fine-tuned version of [01-ai/Yi-6B](https://huggingface.co/01-ai/Yi-6B) on the ruozhiba dataset.
24
  It achieves the following results on the evaluation set:
25
  - Loss: 1.9852
26
 
@@ -70,7 +66,6 @@ The following hyperparameters were used during training:
70
  | 1.3015 | 17.0 | 935 | 1.9796 |
71
  | 1.3456 | 18.0 | 990 | 1.9831 |
72
  | 1.2136 | 19.0 | 1045 | 1.9848 |
73
- | 1.302 | 20.0 | 1100 | 1.9852 |
74
 
75
 
76
  ### Framework versions
 
2
  license: other
3
  library_name: peft
4
  tags:
 
 
5
  - trl
6
  - sft
7
  - generated_from_trainer
 
 
8
  base_model: 01-ai/Yi-6B
9
  model-index:
10
  - name: Yi-6B-ruozhiba-1e-5
 
16
 
17
  # Yi-6B-ruozhiba-1e-5
18
 
19
+ This model is a fine-tuned version of [01-ai/Yi-6B](https://huggingface.co/01-ai/Yi-6B) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 1.9852
22
 
 
66
  | 1.3015 | 17.0 | 935 | 1.9796 |
67
  | 1.3456 | 18.0 | 990 | 1.9831 |
68
  | 1.2136 | 19.0 | 1045 | 1.9848 |
 
69
 
70
 
71
  ### Framework versions
adapter_config.json CHANGED
@@ -19,13 +19,13 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
 
 
22
  "q_proj",
23
- "k_proj",
24
  "v_proj",
25
- "down_proj",
26
  "up_proj",
27
- "o_proj",
28
- "gate_proj"
29
  ],
30
  "task_type": "CAUSAL_LM"
31
  }
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
+ "o_proj",
23
+ "gate_proj",
24
  "q_proj",
 
25
  "v_proj",
26
+ "k_proj",
27
  "up_proj",
28
+ "down_proj"
 
29
  ],
30
  "task_type": "CAUSAL_LM"
31
  }
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 20.0,
3
  "eval_loss": 1.9851655960083008,
4
- "eval_runtime": 4.9249,
5
  "eval_samples": 23,
6
- "eval_samples_per_second": 4.67,
7
- "eval_steps_per_second": 1.218,
8
- "train_loss": 0.4608629340475256,
9
- "train_runtime": 2621.4029,
10
  "train_samples": 217,
11
- "train_samples_per_second": 1.656,
12
- "train_steps_per_second": 0.42
13
  }
 
1
  {
2
  "epoch": 20.0,
3
  "eval_loss": 1.9851655960083008,
4
+ "eval_runtime": 6.5053,
5
  "eval_samples": 23,
6
+ "eval_samples_per_second": 3.536,
7
+ "eval_steps_per_second": 0.922,
8
+ "train_loss": 0.0,
9
+ "train_runtime": 10.5489,
10
  "train_samples": 217,
11
+ "train_samples_per_second": 411.417,
12
+ "train_steps_per_second": 104.276
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
  "eval_loss": 1.9851655960083008,
4
- "eval_runtime": 4.9249,
5
  "eval_samples": 23,
6
- "eval_samples_per_second": 4.67,
7
- "eval_steps_per_second": 1.218
8
  }
 
1
  {
2
  "epoch": 20.0,
3
  "eval_loss": 1.9851655960083008,
4
+ "eval_runtime": 6.5053,
5
  "eval_samples": 23,
6
+ "eval_samples_per_second": 3.536,
7
+ "eval_steps_per_second": 0.922
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.4608629340475256,
4
- "train_runtime": 2621.4029,
5
  "train_samples": 217,
6
- "train_samples_per_second": 1.656,
7
- "train_steps_per_second": 0.42
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.0,
4
+ "train_runtime": 10.5489,
5
  "train_samples": 217,
6
+ "train_samples_per_second": 411.417,
7
+ "train_steps_per_second": 104.276
8
  }
trainer_state.json CHANGED
@@ -1872,27 +1872,14 @@
1872
  "loss": 1.302,
1873
  "step": 1100
1874
  },
1875
- {
1876
- "epoch": 20.0,
1877
- "gpt4_scores": 0.5666666666666667,
1878
- "step": 1100
1879
- },
1880
- {
1881
- "epoch": 20.0,
1882
- "eval_loss": 1.9851655960083008,
1883
- "eval_runtime": 4.9615,
1884
- "eval_samples_per_second": 4.636,
1885
- "eval_steps_per_second": 1.209,
1886
- "step": 1100
1887
- },
1888
  {
1889
  "epoch": 20.0,
1890
  "step": 1100,
1891
  "total_flos": 3.76665795378217e+16,
1892
- "train_loss": 0.4608629340475256,
1893
- "train_runtime": 2621.4029,
1894
- "train_samples_per_second": 1.656,
1895
- "train_steps_per_second": 0.42
1896
  }
1897
  ],
1898
  "logging_steps": 4,
 
1872
  "loss": 1.302,
1873
  "step": 1100
1874
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
1875
  {
1876
  "epoch": 20.0,
1877
  "step": 1100,
1878
  "total_flos": 3.76665795378217e+16,
1879
+ "train_loss": 0.0,
1880
+ "train_runtime": 10.5489,
1881
+ "train_samples_per_second": 411.417,
1882
+ "train_steps_per_second": 104.276
1883
  }
1884
  ],
1885
  "logging_steps": 4,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e881c0023e35912658acadbed8b82b42ce8c9f483ec5f254d9ba8e488b487a05
3
  size 4792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:681e944a530e8d053a857fadfa7e90c8d15b31e2d7d369565dd6b10cc1b3084a
3
  size 4792