kevinzyz commited on
Commit
0590763
·
1 Parent(s): 4239025

Training in progress, epoch 1

Browse files
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 2.0,
3
- "eval_accuracy": 0.30000001192092896,
4
- "eval_loss": 1.5800185203552246,
5
- "eval_runtime": 0.4521,
6
  "eval_samples": 500,
7
- "eval_samples_per_second": 1105.973,
8
- "eval_steps_per_second": 35.391,
9
- "total_flos": 4691397715200.0,
10
- "train_loss": 1.5075831718444823,
11
- "train_runtime": 20.08,
12
  "train_samples": 4000,
13
- "train_samples_per_second": 398.407,
14
- "train_steps_per_second": 12.45
15
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.27000001072883606,
4
+ "eval_loss": 1.60148286819458,
5
+ "eval_runtime": 0.3331,
6
  "eval_samples": 500,
7
+ "eval_samples_per_second": 1501.132,
8
+ "eval_steps_per_second": 24.018,
9
+ "total_flos": 22540141814400.0,
10
+ "train_loss": 1.6052362956697979,
11
+ "train_runtime": 55.5553,
12
  "train_samples": 4000,
13
+ "train_samples_per_second": 720.003,
14
+ "train_steps_per_second": 11.34
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 2.0,
3
- "eval_accuracy": 0.30000001192092896,
4
- "eval_loss": 1.5800185203552246,
5
- "eval_runtime": 0.4521,
6
  "eval_samples": 500,
7
- "eval_samples_per_second": 1105.973,
8
- "eval_steps_per_second": 35.391
9
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.27000001072883606,
4
+ "eval_loss": 1.60148286819458,
5
+ "eval_runtime": 0.3331,
6
  "eval_samples": 500,
7
+ "eval_samples_per_second": 1501.132,
8
+ "eval_steps_per_second": 24.018
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:861a2ac60d31fb772fdc1d1e44c482f5eded7a8bb03efefd3663a3e8ad12ca42
3
  size 12755881
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0096f1a45fa31f275d0de57dd202bc75ce557882075f303de9bcaf314c72d4e
3
  size 12755881
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 2.0,
3
- "total_flos": 4691397715200.0,
4
- "train_loss": 1.5075831718444823,
5
- "train_runtime": 20.08,
6
  "train_samples": 4000,
7
- "train_samples_per_second": 398.407,
8
- "train_steps_per_second": 12.45
9
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "total_flos": 22540141814400.0,
4
+ "train_loss": 1.6052362956697979,
5
+ "train_runtime": 55.5553,
6
  "train_samples": 4000,
7
+ "train_samples_per_second": 720.003,
8
+ "train_steps_per_second": 11.34
9
  }
trainer_state.json CHANGED
@@ -1,115 +1,301 @@
1
  {
2
- "best_metric": 1.5800185203552246,
3
- "best_model_checkpoint": "chinese_roberta_L-2_H-128-finetuned-MC-hyper/checkpoint-125",
4
- "epoch": 2.0,
5
- "global_step": 250,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.16,
12
- "learning_rate": 0.0001608,
13
- "loss": 1.5282,
14
  "step": 20
15
  },
16
  {
17
- "epoch": 0.32,
18
- "learning_rate": 0.0001368,
19
- "loss": 1.5563,
20
  "step": 40
21
  },
22
  {
23
- "epoch": 0.48,
24
- "learning_rate": 0.00011279999999999999,
25
- "loss": 1.5631,
26
  "step": 60
27
  },
28
  {
29
- "epoch": 0.64,
30
- "learning_rate": 8.879999999999999e-05,
31
- "loss": 1.5201,
 
 
 
 
 
 
 
 
 
32
  "step": 80
33
  },
34
  {
35
- "epoch": 0.8,
36
- "learning_rate": 6.479999999999999e-05,
37
- "loss": 1.526,
38
  "step": 100
39
  },
40
  {
41
- "epoch": 0.96,
42
- "learning_rate": 4.08e-05,
43
- "loss": 1.5107,
44
  "step": 120
45
  },
46
  {
47
- "epoch": 1.0,
48
- "eval_accuracy": 0.30000001192092896,
49
- "eval_loss": 1.5800185203552246,
50
- "eval_runtime": 0.4321,
51
- "eval_samples_per_second": 1157.093,
52
- "eval_steps_per_second": 37.027,
53
- "step": 125
54
  },
55
  {
56
- "epoch": 1.12,
57
- "learning_rate": 1.68e-05,
58
- "loss": 1.4737,
59
  "step": 140
60
  },
61
  {
62
- "epoch": 1.28,
63
- "learning_rate": 0.0,
64
- "loss": 1.4647,
65
  "step": 160
66
  },
67
  {
68
- "epoch": 1.44,
69
- "learning_rate": 0.0,
70
- "loss": 1.4731,
71
  "step": 180
72
  },
73
  {
74
- "epoch": 1.6,
75
- "learning_rate": 0.0,
76
- "loss": 1.4969,
 
 
 
 
 
 
 
 
 
77
  "step": 200
78
  },
79
  {
80
- "epoch": 1.76,
81
- "learning_rate": 0.0,
82
- "loss": 1.4998,
83
  "step": 220
84
  },
85
  {
86
- "epoch": 1.92,
87
- "learning_rate": 0.0,
88
- "loss": 1.495,
89
  "step": 240
90
  },
91
  {
92
- "epoch": 2.0,
93
- "eval_accuracy": 0.2939999997615814,
94
- "eval_loss": 1.5831648111343384,
95
- "eval_runtime": 0.4307,
96
- "eval_samples_per_second": 1160.788,
97
- "eval_steps_per_second": 37.145,
98
- "step": 250
99
  },
100
  {
101
- "epoch": 2.0,
102
- "step": 250,
103
- "total_flos": 4691397715200.0,
104
- "train_loss": 1.5075831718444823,
105
- "train_runtime": 20.08,
106
- "train_samples_per_second": 398.407,
107
- "train_steps_per_second": 12.45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  }
109
  ],
110
- "max_steps": 250,
111
- "num_train_epochs": 2,
112
- "total_flos": 4691397715200.0,
113
  "trial_name": null,
114
  "trial_params": null
115
  }
 
1
  {
2
+ "best_metric": 1.60148286819458,
3
+ "best_model_checkpoint": "chinese_roberta_L-2_H-128-finetuned-MC-hyper/checkpoint-630",
4
+ "epoch": 10.0,
5
+ "global_step": 630,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.32,
12
+ "learning_rate": 9.682539682539683e-06,
13
+ "loss": 1.6084,
14
  "step": 20
15
  },
16
  {
17
+ "epoch": 0.63,
18
+ "learning_rate": 9.365079365079366e-06,
19
+ "loss": 1.6085,
20
  "step": 40
21
  },
22
  {
23
+ "epoch": 0.95,
24
+ "learning_rate": 9.047619047619049e-06,
25
+ "loss": 1.6093,
26
  "step": 60
27
  },
28
  {
29
+ "epoch": 1.0,
30
+ "eval_accuracy": 0.27799999713897705,
31
+ "eval_loss": 1.6079264879226685,
32
+ "eval_runtime": 0.3306,
33
+ "eval_samples_per_second": 1512.408,
34
+ "eval_steps_per_second": 24.199,
35
+ "step": 63
36
+ },
37
+ {
38
+ "epoch": 1.27,
39
+ "learning_rate": 8.730158730158731e-06,
40
+ "loss": 1.6092,
41
  "step": 80
42
  },
43
  {
44
+ "epoch": 1.59,
45
+ "learning_rate": 8.412698412698414e-06,
46
+ "loss": 1.6084,
47
  "step": 100
48
  },
49
  {
50
+ "epoch": 1.9,
51
+ "learning_rate": 8.095238095238097e-06,
52
+ "loss": 1.6083,
53
  "step": 120
54
  },
55
  {
56
+ "epoch": 2.0,
57
+ "eval_accuracy": 0.27799999713897705,
58
+ "eval_loss": 1.6071099042892456,
59
+ "eval_runtime": 0.3325,
60
+ "eval_samples_per_second": 1503.597,
61
+ "eval_steps_per_second": 24.058,
62
+ "step": 126
63
  },
64
  {
65
+ "epoch": 2.22,
66
+ "learning_rate": 7.77777777777778e-06,
67
+ "loss": 1.6073,
68
  "step": 140
69
  },
70
  {
71
+ "epoch": 2.54,
72
+ "learning_rate": 7.460317460317461e-06,
73
+ "loss": 1.6081,
74
  "step": 160
75
  },
76
  {
77
+ "epoch": 2.86,
78
+ "learning_rate": 7.1428571428571436e-06,
79
+ "loss": 1.6077,
80
  "step": 180
81
  },
82
  {
83
+ "epoch": 3.0,
84
+ "eval_accuracy": 0.28600001335144043,
85
+ "eval_loss": 1.6063222885131836,
86
+ "eval_runtime": 0.3267,
87
+ "eval_samples_per_second": 1530.439,
88
+ "eval_steps_per_second": 24.487,
89
+ "step": 189
90
+ },
91
+ {
92
+ "epoch": 3.17,
93
+ "learning_rate": 6.825396825396826e-06,
94
+ "loss": 1.6061,
95
  "step": 200
96
  },
97
  {
98
+ "epoch": 3.49,
99
+ "learning_rate": 6.507936507936509e-06,
100
+ "loss": 1.6081,
101
  "step": 220
102
  },
103
  {
104
+ "epoch": 3.81,
105
+ "learning_rate": 6.1904761904761914e-06,
106
+ "loss": 1.6078,
107
  "step": 240
108
  },
109
  {
110
+ "epoch": 4.0,
111
+ "eval_accuracy": 0.2919999957084656,
112
+ "eval_loss": 1.605468511581421,
113
+ "eval_runtime": 0.391,
114
+ "eval_samples_per_second": 1278.612,
115
+ "eval_steps_per_second": 20.458,
116
+ "step": 252
117
  },
118
  {
119
+ "epoch": 4.13,
120
+ "learning_rate": 5.873015873015874e-06,
121
+ "loss": 1.606,
122
+ "step": 260
123
+ },
124
+ {
125
+ "epoch": 4.44,
126
+ "learning_rate": 5.555555555555557e-06,
127
+ "loss": 1.6059,
128
+ "step": 280
129
+ },
130
+ {
131
+ "epoch": 4.76,
132
+ "learning_rate": 5.2380952380952384e-06,
133
+ "loss": 1.6047,
134
+ "step": 300
135
+ },
136
+ {
137
+ "epoch": 5.0,
138
+ "eval_accuracy": 0.28200000524520874,
139
+ "eval_loss": 1.6045148372650146,
140
+ "eval_runtime": 0.3348,
141
+ "eval_samples_per_second": 1493.284,
142
+ "eval_steps_per_second": 23.893,
143
+ "step": 315
144
+ },
145
+ {
146
+ "epoch": 5.08,
147
+ "learning_rate": 4.920634920634921e-06,
148
+ "loss": 1.6059,
149
+ "step": 320
150
+ },
151
+ {
152
+ "epoch": 5.4,
153
+ "learning_rate": 4.603174603174604e-06,
154
+ "loss": 1.6057,
155
+ "step": 340
156
+ },
157
+ {
158
+ "epoch": 5.71,
159
+ "learning_rate": 4.2857142857142855e-06,
160
+ "loss": 1.6042,
161
+ "step": 360
162
+ },
163
+ {
164
+ "epoch": 6.0,
165
+ "eval_accuracy": 0.27799999713897705,
166
+ "eval_loss": 1.60362708568573,
167
+ "eval_runtime": 0.3262,
168
+ "eval_samples_per_second": 1532.953,
169
+ "eval_steps_per_second": 24.527,
170
+ "step": 378
171
+ },
172
+ {
173
+ "epoch": 6.03,
174
+ "learning_rate": 3.968253968253968e-06,
175
+ "loss": 1.6031,
176
+ "step": 380
177
+ },
178
+ {
179
+ "epoch": 6.35,
180
+ "learning_rate": 3.6507936507936507e-06,
181
+ "loss": 1.6043,
182
+ "step": 400
183
+ },
184
+ {
185
+ "epoch": 6.67,
186
+ "learning_rate": 3.3333333333333333e-06,
187
+ "loss": 1.604,
188
+ "step": 420
189
+ },
190
+ {
191
+ "epoch": 6.98,
192
+ "learning_rate": 3.015873015873016e-06,
193
+ "loss": 1.6029,
194
+ "step": 440
195
+ },
196
+ {
197
+ "epoch": 7.0,
198
+ "eval_accuracy": 0.27399998903274536,
199
+ "eval_loss": 1.6027634143829346,
200
+ "eval_runtime": 0.3276,
201
+ "eval_samples_per_second": 1526.145,
202
+ "eval_steps_per_second": 24.418,
203
+ "step": 441
204
+ },
205
+ {
206
+ "epoch": 7.3,
207
+ "learning_rate": 2.6984126984126986e-06,
208
+ "loss": 1.6022,
209
+ "step": 460
210
+ },
211
+ {
212
+ "epoch": 7.62,
213
+ "learning_rate": 2.380952380952381e-06,
214
+ "loss": 1.6041,
215
+ "step": 480
216
+ },
217
+ {
218
+ "epoch": 7.94,
219
+ "learning_rate": 2.0634920634920634e-06,
220
+ "loss": 1.6007,
221
+ "step": 500
222
+ },
223
+ {
224
+ "epoch": 8.0,
225
+ "eval_accuracy": 0.27000001072883606,
226
+ "eval_loss": 1.6020437479019165,
227
+ "eval_runtime": 0.3322,
228
+ "eval_samples_per_second": 1505.137,
229
+ "eval_steps_per_second": 24.082,
230
+ "step": 504
231
+ },
232
+ {
233
+ "epoch": 8.25,
234
+ "learning_rate": 1.746031746031746e-06,
235
+ "loss": 1.6023,
236
+ "step": 520
237
+ },
238
+ {
239
+ "epoch": 8.57,
240
+ "learning_rate": 1.4285714285714286e-06,
241
+ "loss": 1.6026,
242
+ "step": 540
243
+ },
244
+ {
245
+ "epoch": 8.89,
246
+ "learning_rate": 1.111111111111111e-06,
247
+ "loss": 1.6015,
248
+ "step": 560
249
+ },
250
+ {
251
+ "epoch": 9.0,
252
+ "eval_accuracy": 0.27000001072883606,
253
+ "eval_loss": 1.6016141176223755,
254
+ "eval_runtime": 0.3269,
255
+ "eval_samples_per_second": 1529.374,
256
+ "eval_steps_per_second": 24.47,
257
+ "step": 567
258
+ },
259
+ {
260
+ "epoch": 9.21,
261
+ "learning_rate": 7.936507936507937e-07,
262
+ "loss": 1.6016,
263
+ "step": 580
264
+ },
265
+ {
266
+ "epoch": 9.52,
267
+ "learning_rate": 4.7619047619047623e-07,
268
+ "loss": 1.6024,
269
+ "step": 600
270
+ },
271
+ {
272
+ "epoch": 9.84,
273
+ "learning_rate": 1.5873015873015874e-07,
274
+ "loss": 1.6017,
275
+ "step": 620
276
+ },
277
+ {
278
+ "epoch": 10.0,
279
+ "eval_accuracy": 0.27000001072883606,
280
+ "eval_loss": 1.60148286819458,
281
+ "eval_runtime": 0.3257,
282
+ "eval_samples_per_second": 1535.263,
283
+ "eval_steps_per_second": 24.564,
284
+ "step": 630
285
+ },
286
+ {
287
+ "epoch": 10.0,
288
+ "step": 630,
289
+ "total_flos": 22540141814400.0,
290
+ "train_loss": 1.6052362956697979,
291
+ "train_runtime": 55.5553,
292
+ "train_samples_per_second": 720.003,
293
+ "train_steps_per_second": 11.34
294
  }
295
  ],
296
+ "max_steps": 630,
297
+ "num_train_epochs": 10,
298
+ "total_flos": 22540141814400.0,
299
  "trial_name": null,
300
  "trial_params": null
301
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fe78e4f8bd344224f4450af0bb1d8064659984ccaf89355e358f14a9800bafe
3
  size 2799
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:369f79da83f13b5b29eb52d0370d4338999b9bc2fbbcd98395648ddfc7a6d687
3
  size 2799