yyx123 commited on
Commit
05dba02
1 Parent(s): 51dd54c

Model save

Browse files
README.md CHANGED
@@ -2,13 +2,9 @@
2
  license: other
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
- - generated_from_trainer
7
  - trl
8
  - sft
9
  - generated_from_trainer
10
- datasets:
11
- - ruozhiba
12
  base_model: 01-ai/Yi-6B
13
  model-index:
14
  - name: Yi-6B-ruozhiba-1e-5-50
@@ -20,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  # Yi-6B-ruozhiba-1e-5-50
22
 
23
- This model is a fine-tuned version of [01-ai/Yi-6B](https://huggingface.co/01-ai/Yi-6B) on the ruozhiba dataset.
24
  It achieves the following results on the evaluation set:
25
  - Loss: 1.9875
26
 
@@ -70,7 +66,6 @@ The following hyperparameters were used during training:
70
  | 1.2944 | 17.0 | 935 | 1.9819 |
71
  | 1.3433 | 18.0 | 990 | 1.9856 |
72
  | 1.2058 | 19.0 | 1045 | 1.9871 |
73
- | 1.2904 | 20.0 | 1100 | 1.9875 |
74
 
75
 
76
  ### Framework versions
 
2
  license: other
3
  library_name: peft
4
  tags:
 
 
5
  - trl
6
  - sft
7
  - generated_from_trainer
 
 
8
  base_model: 01-ai/Yi-6B
9
  model-index:
10
  - name: Yi-6B-ruozhiba-1e-5-50
 
16
 
17
  # Yi-6B-ruozhiba-1e-5-50
18
 
19
+ This model is a fine-tuned version of [01-ai/Yi-6B](https://huggingface.co/01-ai/Yi-6B) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 1.9875
22
 
 
66
  | 1.2944 | 17.0 | 935 | 1.9819 |
67
  | 1.3433 | 18.0 | 990 | 1.9856 |
68
  | 1.2058 | 19.0 | 1045 | 1.9871 |
 
69
 
70
 
71
  ### Framework versions
adapter_config.json CHANGED
@@ -19,13 +19,13 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "k_proj",
23
- "up_proj",
24
  "v_proj",
25
- "q_proj",
26
- "o_proj",
27
  "down_proj",
28
- "gate_proj"
 
 
 
 
29
  ],
30
  "task_type": "CAUSAL_LM"
31
  }
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
 
 
22
  "v_proj",
 
 
23
  "down_proj",
24
+ "gate_proj",
25
+ "o_proj",
26
+ "q_proj",
27
+ "up_proj",
28
+ "k_proj"
29
  ],
30
  "task_type": "CAUSAL_LM"
31
  }
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 20.0,
3
  "eval_loss": 1.987522840499878,
4
- "eval_runtime": 4.9168,
5
  "eval_samples": 23,
6
- "eval_samples_per_second": 4.678,
7
- "eval_steps_per_second": 1.22,
8
- "train_loss": 1.2573659207604149,
9
- "train_runtime": 10567.2291,
10
  "train_samples": 217,
11
- "train_samples_per_second": 0.411,
12
- "train_steps_per_second": 0.104
13
  }
 
1
  {
2
  "epoch": 20.0,
3
  "eval_loss": 1.987522840499878,
4
+ "eval_runtime": 6.7242,
5
  "eval_samples": 23,
6
+ "eval_samples_per_second": 3.42,
7
+ "eval_steps_per_second": 0.892,
8
+ "train_loss": 0.0,
9
+ "train_runtime": 12.2134,
10
  "train_samples": 217,
11
+ "train_samples_per_second": 355.347,
12
+ "train_steps_per_second": 90.065
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
  "eval_loss": 1.987522840499878,
4
- "eval_runtime": 4.9168,
5
  "eval_samples": 23,
6
- "eval_samples_per_second": 4.678,
7
- "eval_steps_per_second": 1.22
8
  }
 
1
  {
2
  "epoch": 20.0,
3
  "eval_loss": 1.987522840499878,
4
+ "eval_runtime": 6.7242,
5
  "eval_samples": 23,
6
+ "eval_samples_per_second": 3.42,
7
+ "eval_steps_per_second": 0.892
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 1.2573659207604149,
4
- "train_runtime": 10567.2291,
5
  "train_samples": 217,
6
- "train_samples_per_second": 0.411,
7
- "train_steps_per_second": 0.104
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.0,
4
+ "train_runtime": 12.2134,
5
  "train_samples": 217,
6
+ "train_samples_per_second": 355.347,
7
+ "train_steps_per_second": 90.065
8
  }
trainer_state.json CHANGED
@@ -1952,32 +1952,14 @@
1952
  "loss": 1.2904,
1953
  "step": 1100
1954
  },
1955
- {
1956
- "epoch": 20.0,
1957
- "gpt4_scores": 0.59,
1958
- "step": 1100
1959
- },
1960
- {
1961
- "epoch": 20.0,
1962
- "std": 0.130728726758888,
1963
- "step": 1100
1964
- },
1965
- {
1966
- "epoch": 20.0,
1967
- "eval_loss": 1.987522840499878,
1968
- "eval_runtime": 4.9557,
1969
- "eval_samples_per_second": 4.641,
1970
- "eval_steps_per_second": 1.211,
1971
- "step": 1100
1972
- },
1973
  {
1974
  "epoch": 20.0,
1975
  "step": 1100,
1976
  "total_flos": 3.792475205866291e+16,
1977
- "train_loss": 1.2573659207604149,
1978
- "train_runtime": 10567.2291,
1979
- "train_samples_per_second": 0.411,
1980
- "train_steps_per_second": 0.104
1981
  }
1982
  ],
1983
  "logging_steps": 4,
 
1952
  "loss": 1.2904,
1953
  "step": 1100
1954
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1955
  {
1956
  "epoch": 20.0,
1957
  "step": 1100,
1958
  "total_flos": 3.792475205866291e+16,
1959
+ "train_loss": 0.0,
1960
+ "train_runtime": 12.2134,
1961
+ "train_samples_per_second": 355.347,
1962
+ "train_steps_per_second": 90.065
1963
  }
1964
  ],
1965
  "logging_steps": 4,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d301d30a65b58888148eed672cc4fc9efbd661d4c4541f3fbabbbf9c3f39952
3
  size 4792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baed945f93338ffc5ef6fb47b5b597deef8531e08138870207a901564f5b8efa
3
  size 4792