yyx123 commited on
Commit
1bf5f8f
1 Parent(s): e7c76c6

Model save

Browse files
README.md CHANGED
@@ -2,13 +2,9 @@
2
  license: other
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
- - generated_from_trainer
7
  - trl
8
  - sft
9
  - generated_from_trainer
10
- datasets:
11
- - ruozhiba
12
  base_model: 01-ai/Yi-6B
13
  model-index:
14
  - name: Yi-6B-ruozhiba
@@ -20,9 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  # Yi-6B-ruozhiba
22
 
23
- This model is a fine-tuned version of [01-ai/Yi-6B](https://huggingface.co/01-ai/Yi-6B) on the ruozhiba dataset.
24
  It achieves the following results on the evaluation set:
25
- - Loss: 2.2134
26
 
27
  ## Model description
28
 
@@ -54,11 +50,11 @@ The following hyperparameters were used during training:
54
 
55
  | Training Loss | Epoch | Step | Validation Loss |
56
  |:-------------:|:-----:|:----:|:---------------:|
57
- | 1.948 | 1.0 | 217 | 1.9488 |
58
- | 1.7781 | 2.0 | 434 | 1.9393 |
59
- | 1.4563 | 3.0 | 651 | 2.0187 |
60
- | 1.3206 | 4.0 | 868 | 2.1767 |
61
- | 1.1018 | 5.0 | 1085 | 2.2134 |
62
 
63
 
64
  ### Framework versions
 
2
  license: other
3
  library_name: peft
4
  tags:
 
 
5
  - trl
6
  - sft
7
  - generated_from_trainer
 
 
8
  base_model: 01-ai/Yi-6B
9
  model-index:
10
  - name: Yi-6B-ruozhiba
 
16
 
17
  # Yi-6B-ruozhiba
18
 
19
+ This model is a fine-tuned version of [01-ai/Yi-6B](https://huggingface.co/01-ai/Yi-6B) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 1.9288
22
 
23
  ## Model description
24
 
 
50
 
51
  | Training Loss | Epoch | Step | Validation Loss |
52
  |:-------------:|:-----:|:----:|:---------------:|
53
+ | 2.2958 | 1.0 | 55 | 1.9636 |
54
+ | 1.8837 | 2.0 | 110 | 1.9193 |
55
+ | 1.7174 | 3.0 | 165 | 1.9123 |
56
+ | 1.6515 | 4.0 | 220 | 1.9246 |
57
+ | 1.6337 | 5.0 | 275 | 1.9288 |
58
 
59
 
60
  ### Framework versions
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6085ec1693fc53c95aa3dd4c6d6c929835ec9eb23fe8336c9ae1c5bfbee8f76a
3
  size 72673912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:486b40eefed28fbfe0cd8a26cbe038d7910ed6a56e8b4a8821c14505d364ce55
3
  size 72673912
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_loss": 2.2133874893188477,
4
- "eval_runtime": 1.2237,
5
  "eval_samples": 23,
6
- "eval_samples_per_second": 18.796,
7
- "eval_steps_per_second": 4.903,
8
- "train_loss": 0.005994410009428104,
9
- "train_runtime": 16.2081,
10
  "train_samples": 217,
11
- "train_samples_per_second": 66.942,
12
- "train_steps_per_second": 66.942
13
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_loss": 1.9287786483764648,
4
+ "eval_runtime": 1.2455,
5
  "eval_samples": 23,
6
+ "eval_samples_per_second": 18.466,
7
+ "eval_steps_per_second": 4.817,
8
+ "train_loss": 1.8268643422560258,
9
+ "train_runtime": 238.6826,
10
  "train_samples": 217,
11
+ "train_samples_per_second": 4.546,
12
+ "train_steps_per_second": 1.152
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_loss": 2.2133874893188477,
4
- "eval_runtime": 1.2237,
5
  "eval_samples": 23,
6
- "eval_samples_per_second": 18.796,
7
- "eval_steps_per_second": 4.903
8
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_loss": 1.9287786483764648,
4
+ "eval_runtime": 1.2455,
5
  "eval_samples": 23,
6
+ "eval_samples_per_second": 18.466,
7
+ "eval_steps_per_second": 4.817
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
- "train_loss": 0.005994410009428104,
4
- "train_runtime": 16.2081,
5
  "train_samples": 217,
6
- "train_samples_per_second": 66.942,
7
- "train_steps_per_second": 66.942
8
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "train_loss": 1.8268643422560258,
4
+ "train_runtime": 238.6826,
5
  "train_samples": 217,
6
+ "train_samples_per_second": 4.546,
7
+ "train_steps_per_second": 1.152
8
  }
trainer_state.json CHANGED
@@ -3,236 +3,152 @@
3
  "best_model_checkpoint": null,
4
  "epoch": 5.0,
5
  "eval_steps": 500,
6
- "global_step": 1085,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.0,
13
- "learning_rate": 4.587155963302753e-07,
14
- "loss": 2.9047,
15
  "step": 1
16
  },
17
  {
18
- "epoch": 0.18,
19
- "learning_rate": 1.834862385321101e-05,
20
- "loss": 2.6449,
21
- "step": 40
22
- },
23
- {
24
- "epoch": 0.37,
25
- "learning_rate": 3.669724770642202e-05,
26
- "loss": 2.2297,
27
- "step": 80
28
  },
29
  {
30
- "epoch": 0.55,
31
- "learning_rate": 4.998433068104634e-05,
32
- "loss": 2.0343,
33
- "step": 120
34
- },
35
- {
36
- "epoch": 0.74,
37
- "learning_rate": 4.9663895022434335e-05,
38
- "loss": 1.9288,
39
- "step": 160
40
- },
41
- {
42
- "epoch": 0.92,
43
- "learning_rate": 4.893515717147499e-05,
44
- "loss": 1.948,
45
- "step": 200
46
  },
47
  {
48
  "epoch": 1.0,
49
- "eval_loss": 1.9487614631652832,
50
- "eval_runtime": 2.7263,
51
- "eval_samples_per_second": 8.436,
52
- "eval_steps_per_second": 8.436,
53
- "step": 217
54
- },
55
- {
56
- "epoch": 1.11,
57
- "learning_rate": 4.7810181129491795e-05,
58
- "loss": 1.9046,
59
- "step": 240
60
- },
61
- {
62
- "epoch": 1.29,
63
- "learning_rate": 4.630759048306189e-05,
64
- "loss": 1.8033,
65
- "step": 280
66
  },
67
  {
68
- "epoch": 1.47,
69
- "learning_rate": 4.4452260097026376e-05,
70
- "loss": 1.7401,
71
- "step": 320
72
  },
73
  {
74
- "epoch": 1.66,
75
- "learning_rate": 4.227490431976606e-05,
76
- "loss": 1.8471,
77
- "step": 360
78
  },
79
  {
80
- "epoch": 1.84,
81
- "learning_rate": 3.981156851786102e-05,
82
- "loss": 1.7781,
83
- "step": 400
84
  },
85
  {
86
  "epoch": 2.0,
87
- "eval_loss": 1.9392595291137695,
88
- "eval_runtime": 2.7235,
89
- "eval_samples_per_second": 8.445,
90
- "eval_steps_per_second": 8.445,
91
- "step": 434
92
  },
93
  {
94
- "epoch": 2.03,
95
- "learning_rate": 3.710303235760038e-05,
96
- "loss": 1.6503,
97
- "step": 440
98
- },
99
- {
100
- "epoch": 2.21,
101
- "learning_rate": 3.41941347118094e-05,
102
- "loss": 1.5325,
103
- "step": 480
104
- },
105
- {
106
- "epoch": 2.4,
107
- "learning_rate": 3.113303136792597e-05,
108
- "loss": 1.4801,
109
- "step": 520
110
- },
111
- {
112
- "epoch": 2.58,
113
- "learning_rate": 2.7970397825710876e-05,
114
- "loss": 1.5216,
115
- "step": 560
116
  },
117
  {
118
- "epoch": 2.76,
119
- "learning_rate": 2.4758590381998137e-05,
120
- "loss": 1.426,
121
- "step": 600
122
  },
123
  {
124
- "epoch": 2.95,
125
- "learning_rate": 2.1550779390435147e-05,
126
- "loss": 1.4563,
127
- "step": 640
128
  },
129
  {
130
  "epoch": 3.0,
131
- "eval_loss": 2.018707036972046,
132
- "eval_runtime": 2.7309,
133
- "eval_samples_per_second": 8.422,
134
- "eval_steps_per_second": 8.422,
135
- "step": 651
136
- },
137
- {
138
- "epoch": 3.13,
139
- "learning_rate": 1.840006904479584e-05,
140
- "loss": 1.2741,
141
- "step": 680
142
- },
143
- {
144
- "epoch": 3.32,
145
- "learning_rate": 1.5358618257547464e-05,
146
- "loss": 1.1991,
147
- "step": 720
148
- },
149
- {
150
- "epoch": 3.5,
151
- "learning_rate": 1.2476777187220119e-05,
152
- "loss": 1.2377,
153
- "step": 760
154
  },
155
  {
156
- "epoch": 3.69,
157
- "learning_rate": 9.802253709067949e-06,
158
- "loss": 1.2592,
159
- "step": 800
160
  },
161
  {
162
- "epoch": 3.87,
163
- "learning_rate": 7.3793236278095755e-06,
164
- "loss": 1.3206,
165
- "step": 840
166
  },
167
  {
168
  "epoch": 4.0,
169
- "eval_loss": 2.17669677734375,
170
- "eval_runtime": 2.7315,
171
- "eval_samples_per_second": 8.42,
172
- "eval_steps_per_second": 8.42,
173
- "step": 868
174
- },
175
- {
176
- "epoch": 4.06,
177
- "learning_rate": 5.248097707101035e-06,
178
- "loss": 1.1614,
179
- "step": 880
180
- },
181
- {
182
- "epoch": 4.24,
183
- "learning_rate": 3.443857649812915e-06,
184
- "loss": 1.1929,
185
- "step": 920
186
- },
187
- {
188
- "epoch": 4.42,
189
- "learning_rate": 1.9964720217269558e-06,
190
- "loss": 1.0971,
191
- "step": 960
192
  },
193
  {
194
- "epoch": 4.61,
195
- "learning_rate": 9.299017878319383e-07,
196
- "loss": 1.091,
197
- "step": 1000
 
 
198
  },
199
  {
200
- "epoch": 4.79,
201
- "learning_rate": 2.6180364689323554e-07,
202
- "loss": 1.1938,
203
- "step": 1040
204
  },
205
  {
206
- "epoch": 4.98,
207
- "learning_rate": 3.237730954069873e-09,
208
- "loss": 1.1018,
209
- "step": 1080
210
  },
211
  {
212
  "epoch": 5.0,
213
- "eval_loss": 2.2133874893188477,
214
- "eval_runtime": 1.2338,
215
- "eval_samples_per_second": 18.641,
216
- "eval_steps_per_second": 4.863,
217
- "step": 1085
218
  },
219
  {
220
  "epoch": 5.0,
221
- "step": 1085,
222
- "total_flos": 6960819918102528.0,
223
- "train_loss": 0.005994410009428104,
224
- "train_runtime": 16.2081,
225
- "train_samples_per_second": 66.942,
226
- "train_steps_per_second": 66.942
227
  }
228
  ],
229
- "logging_steps": 40,
230
- "max_steps": 1085,
231
  "num_input_tokens_seen": 0,
232
  "num_train_epochs": 5,
233
  "save_steps": 20,
234
- "total_flos": 6960819918102528.0,
235
- "train_batch_size": 1,
236
  "trial_name": null,
237
  "trial_params": null
238
  }
 
3
  "best_model_checkpoint": null,
4
  "epoch": 5.0,
5
  "eval_steps": 500,
6
+ "global_step": 275,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.02,
13
+ "learning_rate": 1.7857142857142857e-06,
14
+ "loss": 2.5611,
15
  "step": 1
16
  },
17
  {
18
+ "epoch": 0.36,
19
+ "learning_rate": 3.571428571428572e-05,
20
+ "loss": 2.5326,
21
+ "step": 20
 
 
 
 
 
 
22
  },
23
  {
24
+ "epoch": 0.73,
25
+ "learning_rate": 4.970937357800635e-05,
26
+ "loss": 2.2958,
27
+ "step": 40
 
 
 
 
 
 
 
 
 
 
 
 
28
  },
29
  {
30
  "epoch": 1.0,
31
+ "eval_loss": 1.9636107683181763,
32
+ "eval_runtime": 1.2207,
33
+ "eval_samples_per_second": 18.842,
34
+ "eval_steps_per_second": 4.915,
35
+ "step": 55
 
 
 
 
 
 
 
 
 
 
 
 
36
  },
37
  {
38
+ "epoch": 1.09,
39
+ "learning_rate": 4.7957734321334915e-05,
40
+ "loss": 1.9545,
41
+ "step": 60
42
  },
43
  {
44
+ "epoch": 1.45,
45
+ "learning_rate": 4.4728512734909844e-05,
46
+ "loss": 1.8763,
47
+ "step": 80
48
  },
49
  {
50
+ "epoch": 1.82,
51
+ "learning_rate": 4.022954469173914e-05,
52
+ "loss": 1.8837,
53
+ "step": 100
54
  },
55
  {
56
  "epoch": 2.0,
57
+ "eval_loss": 1.9193000793457031,
58
+ "eval_runtime": 1.2166,
59
+ "eval_samples_per_second": 18.905,
60
+ "eval_steps_per_second": 4.932,
61
+ "step": 110
62
  },
63
  {
64
+ "epoch": 2.18,
65
+ "learning_rate": 3.475038819391789e-05,
66
+ "loss": 1.7375,
67
+ "step": 120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  },
69
  {
70
+ "epoch": 2.55,
71
+ "learning_rate": 2.864368713521274e-05,
72
+ "loss": 1.7612,
73
+ "step": 140
74
  },
75
  {
76
+ "epoch": 2.91,
77
+ "learning_rate": 2.2302474793696117e-05,
78
+ "loss": 1.7174,
79
+ "step": 160
80
  },
81
  {
82
  "epoch": 3.0,
83
+ "eval_loss": 1.9123048782348633,
84
+ "eval_runtime": 1.2199,
85
+ "eval_samples_per_second": 18.853,
86
+ "eval_steps_per_second": 4.918,
87
+ "step": 165
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  },
89
  {
90
+ "epoch": 3.27,
91
+ "learning_rate": 1.613487782393661e-05,
92
+ "loss": 1.6455,
93
+ "step": 180
94
  },
95
  {
96
+ "epoch": 3.64,
97
+ "learning_rate": 1.0537848824653418e-05,
98
+ "loss": 1.6657,
99
+ "step": 200
100
  },
101
  {
102
  "epoch": 4.0,
103
+ "learning_rate": 5.871618079580327e-06,
104
+ "loss": 1.6515,
105
+ "step": 220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  },
107
  {
108
+ "epoch": 4.0,
109
+ "eval_loss": 1.9245940446853638,
110
+ "eval_runtime": 1.269,
111
+ "eval_samples_per_second": 18.125,
112
+ "eval_steps_per_second": 4.728,
113
+ "step": 220
114
  },
115
  {
116
+ "epoch": 4.36,
117
+ "learning_rate": 2.4365087825904355e-06,
118
+ "loss": 1.5988,
119
+ "step": 240
120
  },
121
  {
122
+ "epoch": 4.73,
123
+ "learning_rate": 4.536079419643374e-07,
124
+ "loss": 1.6337,
125
+ "step": 260
126
  },
127
  {
128
  "epoch": 5.0,
129
+ "eval_loss": 1.9287786483764648,
130
+ "eval_runtime": 1.2179,
131
+ "eval_samples_per_second": 18.886,
132
+ "eval_steps_per_second": 4.927,
133
+ "step": 275
134
  },
135
  {
136
  "epoch": 5.0,
137
+ "step": 275,
138
+ "total_flos": 9506378566434816.0,
139
+ "train_loss": 1.8268643422560258,
140
+ "train_runtime": 238.6826,
141
+ "train_samples_per_second": 4.546,
142
+ "train_steps_per_second": 1.152
143
  }
144
  ],
145
+ "logging_steps": 20,
146
+ "max_steps": 275,
147
  "num_input_tokens_seen": 0,
148
  "num_train_epochs": 5,
149
  "save_steps": 20,
150
+ "total_flos": 9506378566434816.0,
151
+ "train_batch_size": 4,
152
  "trial_name": null,
153
  "trial_params": null
154
  }