tyzhu commited on
Commit
347967c
·
verified ·
1 Parent(s): d1f55a6

End of training

Browse files
Files changed (6) hide show
  1. README.md +14 -2
  2. all_results.json +16 -0
  3. eval_results.json +10 -0
  4. tokenizer.json +1 -6
  5. train_results.json +9 -0
  6. trainer_state.json +246 -0
README.md CHANGED
@@ -3,11 +3,23 @@ license: other
3
  base_model: Qwen/Qwen1.5-4B
4
  tags:
5
  - generated_from_trainer
 
 
6
  metrics:
7
  - accuracy
8
  model-index:
9
  - name: lmind_nq_train6000_eval6489_v1_qa_Qwen_Qwen1.5-4B_lora2
10
- results: []
 
 
 
 
 
 
 
 
 
 
11
  library_name: peft
12
  ---
13
 
@@ -16,7 +28,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # lmind_nq_train6000_eval6489_v1_qa_Qwen_Qwen1.5-4B_lora2
18
 
19
- This model is a fine-tuned version of [Qwen/Qwen1.5-4B](https://huggingface.co/Qwen/Qwen1.5-4B) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 2.2527
22
  - Accuracy: 0.5594
 
3
  base_model: Qwen/Qwen1.5-4B
4
  tags:
5
  - generated_from_trainer
6
+ datasets:
7
+ - tyzhu/lmind_nq_train6000_eval6489_v1_qa
8
  metrics:
9
  - accuracy
10
  model-index:
11
  - name: lmind_nq_train6000_eval6489_v1_qa_Qwen_Qwen1.5-4B_lora2
12
+ results:
13
+ - task:
14
+ name: Causal Language Modeling
15
+ type: text-generation
16
+ dataset:
17
+ name: tyzhu/lmind_nq_train6000_eval6489_v1_qa
18
+ type: tyzhu/lmind_nq_train6000_eval6489_v1_qa
19
+ metrics:
20
+ - name: Accuracy
21
+ type: accuracy
22
+ value: 0.5594358974358974
23
  library_name: peft
24
  ---
25
 
 
28
 
29
  # lmind_nq_train6000_eval6489_v1_qa_Qwen_Qwen1.5-4B_lora2
30
 
31
+ This model is a fine-tuned version of [Qwen/Qwen1.5-4B](https://huggingface.co/Qwen/Qwen1.5-4B) on the tyzhu/lmind_nq_train6000_eval6489_v1_qa dataset.
32
  It achieves the following results on the evaluation set:
33
  - Loss: 2.2527
34
  - Accuracy: 0.5594
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.973333333333333,
3
+ "eval_accuracy": 0.5594358974358974,
4
+ "eval_loss": 2.252700090408325,
5
+ "eval_runtime": 7.2866,
6
+ "eval_samples": 500,
7
+ "eval_samples_per_second": 68.619,
8
+ "eval_steps_per_second": 8.646,
9
+ "perplexity": 9.51338819714881,
10
+ "total_flos": 3.256692486491341e+16,
11
+ "train_loss": 0.933582401785621,
12
+ "train_runtime": 6553.864,
13
+ "train_samples": 6000,
14
+ "train_samples_per_second": 9.155,
15
+ "train_steps_per_second": 0.285
16
+ }
eval_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.973333333333333,
3
+ "eval_accuracy": 0.5594358974358974,
4
+ "eval_loss": 2.252700090408325,
5
+ "eval_runtime": 7.2866,
6
+ "eval_samples": 500,
7
+ "eval_samples_per_second": 68.619,
8
+ "eval_steps_per_second": 8.646,
9
+ "perplexity": 9.51338819714881
10
+ }
tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 1024,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.973333333333333,
3
+ "total_flos": 3.256692486491341e+16,
4
+ "train_loss": 0.933582401785621,
5
+ "train_runtime": 6553.864,
6
+ "train_samples": 6000,
7
+ "train_samples_per_second": 9.155,
8
+ "train_steps_per_second": 0.285
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 9.973333333333333,
5
+ "eval_steps": 500,
6
+ "global_step": 1870,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.5333333333333333,
13
+ "grad_norm": 0.5919559597969055,
14
+ "learning_rate": 0.0001,
15
+ "loss": 1.7657,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.9973333333333333,
20
+ "eval_accuracy": 0.5738461538461539,
21
+ "eval_loss": 1.6214678287506104,
22
+ "eval_runtime": 6.7752,
23
+ "eval_samples_per_second": 73.799,
24
+ "eval_steps_per_second": 9.299,
25
+ "step": 187
26
+ },
27
+ {
28
+ "epoch": 1.0666666666666667,
29
+ "grad_norm": 0.7285414934158325,
30
+ "learning_rate": 0.0001,
31
+ "loss": 1.6421,
32
+ "step": 200
33
+ },
34
+ {
35
+ "epoch": 1.6,
36
+ "grad_norm": 1.0658913850784302,
37
+ "learning_rate": 0.0001,
38
+ "loss": 1.497,
39
+ "step": 300
40
+ },
41
+ {
42
+ "epoch": 2.0,
43
+ "eval_accuracy": 0.5742051282051283,
44
+ "eval_loss": 1.6179509162902832,
45
+ "eval_runtime": 7.0059,
46
+ "eval_samples_per_second": 71.369,
47
+ "eval_steps_per_second": 8.992,
48
+ "step": 375
49
+ },
50
+ {
51
+ "epoch": 2.1333333333333333,
52
+ "grad_norm": 1.2808786630630493,
53
+ "learning_rate": 0.0001,
54
+ "loss": 1.4367,
55
+ "step": 400
56
+ },
57
+ {
58
+ "epoch": 2.6666666666666665,
59
+ "grad_norm": 1.369543194770813,
60
+ "learning_rate": 0.0001,
61
+ "loss": 1.2345,
62
+ "step": 500
63
+ },
64
+ {
65
+ "epoch": 2.997333333333333,
66
+ "eval_accuracy": 0.5712820512820512,
67
+ "eval_loss": 1.6951237916946411,
68
+ "eval_runtime": 7.4766,
69
+ "eval_samples_per_second": 66.875,
70
+ "eval_steps_per_second": 8.426,
71
+ "step": 562
72
+ },
73
+ {
74
+ "epoch": 3.2,
75
+ "grad_norm": 1.4907397031784058,
76
+ "learning_rate": 0.0001,
77
+ "loss": 1.157,
78
+ "step": 600
79
+ },
80
+ {
81
+ "epoch": 3.7333333333333334,
82
+ "grad_norm": 1.5697628259658813,
83
+ "learning_rate": 0.0001,
84
+ "loss": 1.0084,
85
+ "step": 700
86
+ },
87
+ {
88
+ "epoch": 4.0,
89
+ "eval_accuracy": 0.5658974358974359,
90
+ "eval_loss": 1.8059020042419434,
91
+ "eval_runtime": 7.0878,
92
+ "eval_samples_per_second": 70.544,
93
+ "eval_steps_per_second": 8.889,
94
+ "step": 750
95
+ },
96
+ {
97
+ "epoch": 4.266666666666667,
98
+ "grad_norm": 1.6422948837280273,
99
+ "learning_rate": 0.0001,
100
+ "loss": 0.9221,
101
+ "step": 800
102
+ },
103
+ {
104
+ "epoch": 4.8,
105
+ "grad_norm": 1.6770026683807373,
106
+ "learning_rate": 0.0001,
107
+ "loss": 0.8397,
108
+ "step": 900
109
+ },
110
+ {
111
+ "epoch": 4.997333333333334,
112
+ "eval_accuracy": 0.5646666666666667,
113
+ "eval_loss": 1.9245407581329346,
114
+ "eval_runtime": 7.4653,
115
+ "eval_samples_per_second": 66.977,
116
+ "eval_steps_per_second": 8.439,
117
+ "step": 937
118
+ },
119
+ {
120
+ "epoch": 5.333333333333333,
121
+ "grad_norm": 1.588730812072754,
122
+ "learning_rate": 0.0001,
123
+ "loss": 0.7486,
124
+ "step": 1000
125
+ },
126
+ {
127
+ "epoch": 5.866666666666667,
128
+ "grad_norm": 1.8069469928741455,
129
+ "learning_rate": 0.0001,
130
+ "loss": 0.7186,
131
+ "step": 1100
132
+ },
133
+ {
134
+ "epoch": 6.0,
135
+ "eval_accuracy": 0.5614358974358974,
136
+ "eval_loss": 2.0345287322998047,
137
+ "eval_runtime": 6.285,
138
+ "eval_samples_per_second": 79.554,
139
+ "eval_steps_per_second": 10.024,
140
+ "step": 1125
141
+ },
142
+ {
143
+ "epoch": 6.4,
144
+ "grad_norm": 1.590628981590271,
145
+ "learning_rate": 0.0001,
146
+ "loss": 0.6356,
147
+ "step": 1200
148
+ },
149
+ {
150
+ "epoch": 6.933333333333334,
151
+ "grad_norm": 1.9079328775405884,
152
+ "learning_rate": 0.0001,
153
+ "loss": 0.6421,
154
+ "step": 1300
155
+ },
156
+ {
157
+ "epoch": 6.997333333333334,
158
+ "eval_accuracy": 0.5607692307692308,
159
+ "eval_loss": 2.1147842407226562,
160
+ "eval_runtime": 8.0598,
161
+ "eval_samples_per_second": 62.037,
162
+ "eval_steps_per_second": 7.817,
163
+ "step": 1312
164
+ },
165
+ {
166
+ "epoch": 7.466666666666667,
167
+ "grad_norm": 1.3450945615768433,
168
+ "learning_rate": 0.0001,
169
+ "loss": 0.5814,
170
+ "step": 1400
171
+ },
172
+ {
173
+ "epoch": 8.0,
174
+ "grad_norm": 1.2447816133499146,
175
+ "learning_rate": 0.0001,
176
+ "loss": 0.5968,
177
+ "step": 1500
178
+ },
179
+ {
180
+ "epoch": 8.0,
181
+ "eval_accuracy": 0.5584615384615385,
182
+ "eval_loss": 2.1779356002807617,
183
+ "eval_runtime": 7.4067,
184
+ "eval_samples_per_second": 67.506,
185
+ "eval_steps_per_second": 8.506,
186
+ "step": 1500
187
+ },
188
+ {
189
+ "epoch": 8.533333333333333,
190
+ "grad_norm": 1.113315463066101,
191
+ "learning_rate": 0.0001,
192
+ "loss": 0.5417,
193
+ "step": 1600
194
+ },
195
+ {
196
+ "epoch": 8.997333333333334,
197
+ "eval_accuracy": 0.5567692307692308,
198
+ "eval_loss": 2.265408754348755,
199
+ "eval_runtime": 6.4222,
200
+ "eval_samples_per_second": 77.854,
201
+ "eval_steps_per_second": 9.81,
202
+ "step": 1687
203
+ },
204
+ {
205
+ "epoch": 9.066666666666666,
206
+ "grad_norm": 2.0787792205810547,
207
+ "learning_rate": 0.0001,
208
+ "loss": 0.5678,
209
+ "step": 1700
210
+ },
211
+ {
212
+ "epoch": 9.6,
213
+ "grad_norm": 1.027565598487854,
214
+ "learning_rate": 0.0001,
215
+ "loss": 0.5356,
216
+ "step": 1800
217
+ },
218
+ {
219
+ "epoch": 9.973333333333333,
220
+ "eval_accuracy": 0.5594358974358974,
221
+ "eval_loss": 2.252700090408325,
222
+ "eval_runtime": 7.737,
223
+ "eval_samples_per_second": 64.624,
224
+ "eval_steps_per_second": 8.143,
225
+ "step": 1870
226
+ },
227
+ {
228
+ "epoch": 9.973333333333333,
229
+ "step": 1870,
230
+ "total_flos": 3.256692486491341e+16,
231
+ "train_loss": 0.933582401785621,
232
+ "train_runtime": 6553.864,
233
+ "train_samples_per_second": 9.155,
234
+ "train_steps_per_second": 0.285
235
+ }
236
+ ],
237
+ "logging_steps": 100,
238
+ "max_steps": 1870,
239
+ "num_input_tokens_seen": 0,
240
+ "num_train_epochs": 10,
241
+ "save_steps": 500,
242
+ "total_flos": 3.256692486491341e+16,
243
+ "train_batch_size": 1,
244
+ "trial_name": null,
245
+ "trial_params": null
246
+ }