vintage-lavender619 commited on
Commit
e46f449
·
verified ·
1 Parent(s): 5e3b302

Training in progress, epoch 1

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.928125,
4
- "eval_loss": 0.2525596618652344,
5
- "eval_runtime": 2.8992,
6
- "eval_samples_per_second": 110.376,
7
- "eval_steps_per_second": 3.449,
8
  "total_flos": 8.32925255860224e+17,
9
- "train_loss": 0.5647170758247375,
10
- "train_runtime": 482.2543,
11
- "train_samples_per_second": 53.084,
12
- "train_steps_per_second": 0.415
13
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.903125,
4
+ "eval_loss": 0.3030492663383484,
5
+ "eval_runtime": 2.7444,
6
+ "eval_samples_per_second": 116.602,
7
+ "eval_steps_per_second": 3.644,
8
  "total_flos": 8.32925255860224e+17,
9
+ "train_loss": 0.5333085978031158,
10
+ "train_runtime": 482.5391,
11
+ "train_samples_per_second": 53.053,
12
+ "train_steps_per_second": 0.414
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.928125,
4
- "eval_loss": 0.2525596618652344,
5
- "eval_runtime": 2.8992,
6
- "eval_samples_per_second": 110.376,
7
- "eval_steps_per_second": 3.449
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.903125,
4
+ "eval_loss": 0.3030492663383484,
5
+ "eval_runtime": 2.7444,
6
+ "eval_samples_per_second": 116.602,
7
+ "eval_steps_per_second": 3.644
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4492651dec1b75bb2a5a9d617869b31a321f7040a9d0a37ed442434c9b0c8d2d
3
  size 110356296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e961680588cf95e0d30b24da7074b2c0d77f56332cab62eacaed3af545bcb76
3
  size 110356296
runs/Jun10_09-55-09_4c61f7eac1f1/events.out.tfevents.1718013803.4c61f7eac1f1.793.8 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7ecccfb65b2e0794de96665027a112fb4b3187914712a60c06fe89715ed8326
3
+ size 411
runs/Jun10_10-03-54_4c61f7eac1f1/events.out.tfevents.1718013834.4c61f7eac1f1.793.9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17ce9aacc259b77eab5e8dc41106a5c6e90d9914466b10d5545ee07dc6ffb5cd
3
+ size 5910
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
  "total_flos": 8.32925255860224e+17,
4
- "train_loss": 0.5647170758247375,
5
- "train_runtime": 482.2543,
6
- "train_samples_per_second": 53.084,
7
- "train_steps_per_second": 0.415
8
  }
 
1
  {
2
  "epoch": 20.0,
3
  "total_flos": 8.32925255860224e+17,
4
+ "train_loss": 0.5333085978031158,
5
+ "train_runtime": 482.5391,
6
+ "train_samples_per_second": 53.053,
7
+ "train_steps_per_second": 0.414
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.928125,
3
- "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-finalterm/checkpoint-200",
4
  "epoch": 20.0,
5
  "eval_steps": 500,
6
  "global_step": 200,
@@ -10,332 +10,332 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 2.6207761764526367,
14
  "learning_rate": 2.5e-05,
15
- "loss": 1.4133,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.378125,
21
- "eval_loss": 1.3333066701889038,
22
- "eval_runtime": 2.787,
23
- "eval_samples_per_second": 114.821,
24
- "eval_steps_per_second": 3.588,
25
  "step": 10
26
  },
27
  {
28
  "epoch": 2.0,
29
- "grad_norm": 7.7114481925964355,
30
  "learning_rate": 5e-05,
31
- "loss": 1.2307,
32
  "step": 20
33
  },
34
  {
35
  "epoch": 2.0,
36
- "eval_accuracy": 0.534375,
37
- "eval_loss": 1.0633662939071655,
38
- "eval_runtime": 2.7679,
39
- "eval_samples_per_second": 115.611,
40
- "eval_steps_per_second": 3.613,
41
  "step": 20
42
  },
43
  {
44
  "epoch": 3.0,
45
- "grad_norm": 14.193839073181152,
46
  "learning_rate": 4.722222222222222e-05,
47
- "loss": 0.9297,
48
  "step": 30
49
  },
50
  {
51
  "epoch": 3.0,
52
- "eval_accuracy": 0.625,
53
- "eval_loss": 0.8310818672180176,
54
- "eval_runtime": 2.7652,
55
- "eval_samples_per_second": 115.723,
56
- "eval_steps_per_second": 3.616,
57
  "step": 30
58
  },
59
  {
60
  "epoch": 4.0,
61
- "grad_norm": 9.415961265563965,
62
  "learning_rate": 4.4444444444444447e-05,
63
- "loss": 0.7477,
64
  "step": 40
65
  },
66
  {
67
  "epoch": 4.0,
68
- "eval_accuracy": 0.70625,
69
- "eval_loss": 0.6810880899429321,
70
- "eval_runtime": 2.7914,
71
- "eval_samples_per_second": 114.637,
72
- "eval_steps_per_second": 3.582,
73
  "step": 40
74
  },
75
  {
76
  "epoch": 5.0,
77
- "grad_norm": 7.488087177276611,
78
  "learning_rate": 4.166666666666667e-05,
79
- "loss": 0.6441,
80
  "step": 50
81
  },
82
  {
83
  "epoch": 5.0,
84
- "eval_accuracy": 0.8125,
85
- "eval_loss": 0.45663365721702576,
86
- "eval_runtime": 2.8439,
87
- "eval_samples_per_second": 112.523,
88
- "eval_steps_per_second": 3.516,
89
  "step": 50
90
  },
91
  {
92
  "epoch": 6.0,
93
- "grad_norm": 7.239401817321777,
94
  "learning_rate": 3.888888888888889e-05,
95
- "loss": 0.5618,
96
  "step": 60
97
  },
98
  {
99
  "epoch": 6.0,
100
- "eval_accuracy": 0.85625,
101
- "eval_loss": 0.3988620340824127,
102
- "eval_runtime": 2.8019,
103
- "eval_samples_per_second": 114.209,
104
- "eval_steps_per_second": 3.569,
105
  "step": 60
106
  },
107
  {
108
  "epoch": 7.0,
109
- "grad_norm": 6.502441883087158,
110
  "learning_rate": 3.611111111111111e-05,
111
- "loss": 0.4774,
112
  "step": 70
113
  },
114
  {
115
  "epoch": 7.0,
116
  "eval_accuracy": 0.859375,
117
- "eval_loss": 0.3833409249782562,
118
- "eval_runtime": 2.7898,
119
- "eval_samples_per_second": 114.702,
120
- "eval_steps_per_second": 3.584,
121
  "step": 70
122
  },
123
  {
124
  "epoch": 8.0,
125
- "grad_norm": 8.868508338928223,
126
  "learning_rate": 3.3333333333333335e-05,
127
- "loss": 0.5328,
128
  "step": 80
129
  },
130
  {
131
  "epoch": 8.0,
132
- "eval_accuracy": 0.865625,
133
- "eval_loss": 0.36920028924942017,
134
- "eval_runtime": 2.7882,
135
- "eval_samples_per_second": 114.771,
136
- "eval_steps_per_second": 3.587,
137
  "step": 80
138
  },
139
  {
140
  "epoch": 9.0,
141
- "grad_norm": 5.555983066558838,
142
  "learning_rate": 3.055555555555556e-05,
143
- "loss": 0.4705,
144
  "step": 90
145
  },
146
  {
147
  "epoch": 9.0,
148
- "eval_accuracy": 0.89375,
149
- "eval_loss": 0.2731629014015198,
150
- "eval_runtime": 2.7951,
151
- "eval_samples_per_second": 114.485,
152
- "eval_steps_per_second": 3.578,
153
  "step": 90
154
  },
155
  {
156
  "epoch": 10.0,
157
- "grad_norm": 6.331461429595947,
158
  "learning_rate": 2.777777777777778e-05,
159
- "loss": 0.4338,
160
  "step": 100
161
  },
162
  {
163
  "epoch": 10.0,
164
- "eval_accuracy": 0.9125,
165
- "eval_loss": 0.25906652212142944,
166
- "eval_runtime": 2.7785,
167
- "eval_samples_per_second": 115.172,
168
- "eval_steps_per_second": 3.599,
169
  "step": 100
170
  },
171
  {
172
  "epoch": 11.0,
173
- "grad_norm": 9.486348152160645,
174
  "learning_rate": 2.5e-05,
175
- "loss": 0.4548,
176
  "step": 110
177
  },
178
  {
179
  "epoch": 11.0,
180
- "eval_accuracy": 0.89375,
181
- "eval_loss": 0.3341856896877289,
182
- "eval_runtime": 2.7783,
183
- "eval_samples_per_second": 115.179,
184
- "eval_steps_per_second": 3.599,
185
  "step": 110
186
  },
187
  {
188
  "epoch": 12.0,
189
- "grad_norm": 5.805672645568848,
190
  "learning_rate": 2.2222222222222223e-05,
191
- "loss": 0.4091,
192
  "step": 120
193
  },
194
  {
195
  "epoch": 12.0,
196
- "eval_accuracy": 0.925,
197
- "eval_loss": 0.27951109409332275,
198
- "eval_runtime": 2.784,
199
- "eval_samples_per_second": 114.941,
200
- "eval_steps_per_second": 3.592,
201
  "step": 120
202
  },
203
  {
204
  "epoch": 13.0,
205
- "grad_norm": 6.052751541137695,
206
  "learning_rate": 1.9444444444444445e-05,
207
- "loss": 0.3895,
208
  "step": 130
209
  },
210
  {
211
  "epoch": 13.0,
212
- "eval_accuracy": 0.896875,
213
- "eval_loss": 0.2881876826286316,
214
- "eval_runtime": 2.7921,
215
- "eval_samples_per_second": 114.609,
216
- "eval_steps_per_second": 3.582,
217
  "step": 130
218
  },
219
  {
220
  "epoch": 14.0,
221
- "grad_norm": 5.795813083648682,
222
  "learning_rate": 1.6666666666666667e-05,
223
- "loss": 0.375,
224
  "step": 140
225
  },
226
  {
227
  "epoch": 14.0,
228
- "eval_accuracy": 0.925,
229
- "eval_loss": 0.250267893075943,
230
- "eval_runtime": 2.8168,
231
- "eval_samples_per_second": 113.606,
232
- "eval_steps_per_second": 3.55,
233
  "step": 140
234
  },
235
  {
236
  "epoch": 15.0,
237
- "grad_norm": 4.954117774963379,
238
  "learning_rate": 1.388888888888889e-05,
239
- "loss": 0.3914,
240
  "step": 150
241
  },
242
  {
243
  "epoch": 15.0,
244
- "eval_accuracy": 0.915625,
245
- "eval_loss": 0.27446645498275757,
246
- "eval_runtime": 2.8321,
247
- "eval_samples_per_second": 112.992,
248
- "eval_steps_per_second": 3.531,
249
  "step": 150
250
  },
251
  {
252
  "epoch": 16.0,
253
- "grad_norm": 5.904136657714844,
254
  "learning_rate": 1.1111111111111112e-05,
255
- "loss": 0.3757,
256
  "step": 160
257
  },
258
  {
259
  "epoch": 16.0,
260
- "eval_accuracy": 0.91875,
261
- "eval_loss": 0.25052276253700256,
262
- "eval_runtime": 2.8221,
263
- "eval_samples_per_second": 113.391,
264
- "eval_steps_per_second": 3.543,
265
  "step": 160
266
  },
267
  {
268
  "epoch": 17.0,
269
- "grad_norm": 7.244380474090576,
270
  "learning_rate": 8.333333333333334e-06,
271
- "loss": 0.3645,
272
  "step": 170
273
  },
274
  {
275
  "epoch": 17.0,
276
- "eval_accuracy": 0.925,
277
- "eval_loss": 0.2719380557537079,
278
- "eval_runtime": 2.8421,
279
- "eval_samples_per_second": 112.594,
280
- "eval_steps_per_second": 3.519,
281
  "step": 170
282
  },
283
  {
284
  "epoch": 18.0,
285
- "grad_norm": 4.3203582763671875,
286
  "learning_rate": 5.555555555555556e-06,
287
- "loss": 0.3735,
288
  "step": 180
289
  },
290
  {
291
  "epoch": 18.0,
292
- "eval_accuracy": 0.91875,
293
- "eval_loss": 0.2649078071117401,
294
- "eval_runtime": 3.045,
295
- "eval_samples_per_second": 105.092,
296
- "eval_steps_per_second": 3.284,
297
  "step": 180
298
  },
299
  {
300
  "epoch": 19.0,
301
- "grad_norm": 5.9344964027404785,
302
  "learning_rate": 2.777777777777778e-06,
303
- "loss": 0.3498,
304
  "step": 190
305
  },
306
  {
307
  "epoch": 19.0,
308
- "eval_accuracy": 0.925,
309
- "eval_loss": 0.2517745792865753,
310
- "eval_runtime": 2.8415,
311
- "eval_samples_per_second": 112.617,
312
- "eval_steps_per_second": 3.519,
313
  "step": 190
314
  },
315
  {
316
  "epoch": 20.0,
317
- "grad_norm": 3.646685838699341,
318
  "learning_rate": 0.0,
319
- "loss": 0.3693,
320
  "step": 200
321
  },
322
  {
323
  "epoch": 20.0,
324
- "eval_accuracy": 0.928125,
325
- "eval_loss": 0.2525596618652344,
326
- "eval_runtime": 2.8098,
327
- "eval_samples_per_second": 113.888,
328
- "eval_steps_per_second": 3.559,
329
  "step": 200
330
  },
331
  {
332
  "epoch": 20.0,
333
  "step": 200,
334
  "total_flos": 8.32925255860224e+17,
335
- "train_loss": 0.5647170758247375,
336
- "train_runtime": 482.2543,
337
- "train_samples_per_second": 53.084,
338
- "train_steps_per_second": 0.415
339
  }
340
  ],
341
  "logging_steps": 10,
 
1
  {
2
+ "best_metric": 0.903125,
3
+ "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-finalterm/checkpoint-120",
4
  "epoch": 20.0,
5
  "eval_steps": 500,
6
  "global_step": 200,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 4.020592212677002,
14
  "learning_rate": 2.5e-05,
15
+ "loss": 1.3728,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.515625,
21
+ "eval_loss": 1.2643654346466064,
22
+ "eval_runtime": 2.8905,
23
+ "eval_samples_per_second": 110.709,
24
+ "eval_steps_per_second": 3.46,
25
  "step": 10
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "grad_norm": 5.058677673339844,
30
  "learning_rate": 5e-05,
31
+ "loss": 1.1308,
32
  "step": 20
33
  },
34
  {
35
  "epoch": 2.0,
36
+ "eval_accuracy": 0.625,
37
+ "eval_loss": 0.8816311955451965,
38
+ "eval_runtime": 2.7866,
39
+ "eval_samples_per_second": 114.835,
40
+ "eval_steps_per_second": 3.589,
41
  "step": 20
42
  },
43
  {
44
  "epoch": 3.0,
45
+ "grad_norm": 7.673556327819824,
46
  "learning_rate": 4.722222222222222e-05,
47
+ "loss": 0.8721,
48
  "step": 30
49
  },
50
  {
51
  "epoch": 3.0,
52
+ "eval_accuracy": 0.70625,
53
+ "eval_loss": 0.6829319000244141,
54
+ "eval_runtime": 2.7596,
55
+ "eval_samples_per_second": 115.959,
56
+ "eval_steps_per_second": 3.624,
57
  "step": 30
58
  },
59
  {
60
  "epoch": 4.0,
61
+ "grad_norm": 4.715519905090332,
62
  "learning_rate": 4.4444444444444447e-05,
63
+ "loss": 0.6919,
64
  "step": 40
65
  },
66
  {
67
  "epoch": 4.0,
68
+ "eval_accuracy": 0.80625,
69
+ "eval_loss": 0.5298391580581665,
70
+ "eval_runtime": 2.8001,
71
+ "eval_samples_per_second": 114.281,
72
+ "eval_steps_per_second": 3.571,
73
  "step": 40
74
  },
75
  {
76
  "epoch": 5.0,
77
+ "grad_norm": 7.957090377807617,
78
  "learning_rate": 4.166666666666667e-05,
79
+ "loss": 0.5876,
80
  "step": 50
81
  },
82
  {
83
  "epoch": 5.0,
84
+ "eval_accuracy": 0.86875,
85
+ "eval_loss": 0.4100002348423004,
86
+ "eval_runtime": 2.7806,
87
+ "eval_samples_per_second": 115.082,
88
+ "eval_steps_per_second": 3.596,
89
  "step": 50
90
  },
91
  {
92
  "epoch": 6.0,
93
+ "grad_norm": 6.996800899505615,
94
  "learning_rate": 3.888888888888889e-05,
95
+ "loss": 0.5504,
96
  "step": 60
97
  },
98
  {
99
  "epoch": 6.0,
100
+ "eval_accuracy": 0.853125,
101
+ "eval_loss": 0.4152528643608093,
102
+ "eval_runtime": 2.9983,
103
+ "eval_samples_per_second": 106.727,
104
+ "eval_steps_per_second": 3.335,
105
  "step": 60
106
  },
107
  {
108
  "epoch": 7.0,
109
+ "grad_norm": 4.525465965270996,
110
  "learning_rate": 3.611111111111111e-05,
111
+ "loss": 0.459,
112
  "step": 70
113
  },
114
  {
115
  "epoch": 7.0,
116
  "eval_accuracy": 0.859375,
117
+ "eval_loss": 0.3827503025531769,
118
+ "eval_runtime": 2.7918,
119
+ "eval_samples_per_second": 114.62,
120
+ "eval_steps_per_second": 3.582,
121
  "step": 70
122
  },
123
  {
124
  "epoch": 8.0,
125
+ "grad_norm": 4.351122856140137,
126
  "learning_rate": 3.3333333333333335e-05,
127
+ "loss": 0.4501,
128
  "step": 80
129
  },
130
  {
131
  "epoch": 8.0,
132
+ "eval_accuracy": 0.8625,
133
+ "eval_loss": 0.39407286047935486,
134
+ "eval_runtime": 2.7588,
135
+ "eval_samples_per_second": 115.992,
136
+ "eval_steps_per_second": 3.625,
137
  "step": 80
138
  },
139
  {
140
  "epoch": 9.0,
141
+ "grad_norm": 5.307958602905273,
142
  "learning_rate": 3.055555555555556e-05,
143
+ "loss": 0.4312,
144
  "step": 90
145
  },
146
  {
147
  "epoch": 9.0,
148
+ "eval_accuracy": 0.871875,
149
+ "eval_loss": 0.36500272154808044,
150
+ "eval_runtime": 2.7271,
151
+ "eval_samples_per_second": 117.34,
152
+ "eval_steps_per_second": 3.667,
153
  "step": 90
154
  },
155
  {
156
  "epoch": 10.0,
157
+ "grad_norm": 5.813844203948975,
158
  "learning_rate": 2.777777777777778e-05,
159
+ "loss": 0.4119,
160
  "step": 100
161
  },
162
  {
163
  "epoch": 10.0,
164
+ "eval_accuracy": 0.875,
165
+ "eval_loss": 0.3515123128890991,
166
+ "eval_runtime": 2.736,
167
+ "eval_samples_per_second": 116.961,
168
+ "eval_steps_per_second": 3.655,
169
  "step": 100
170
  },
171
  {
172
  "epoch": 11.0,
173
+ "grad_norm": 8.393081665039062,
174
  "learning_rate": 2.5e-05,
175
+ "loss": 0.4014,
176
  "step": 110
177
  },
178
  {
179
  "epoch": 11.0,
180
+ "eval_accuracy": 0.896875,
181
+ "eval_loss": 0.3110010325908661,
182
+ "eval_runtime": 2.7536,
183
+ "eval_samples_per_second": 116.212,
184
+ "eval_steps_per_second": 3.632,
185
  "step": 110
186
  },
187
  {
188
  "epoch": 12.0,
189
+ "grad_norm": 5.969035625457764,
190
  "learning_rate": 2.2222222222222223e-05,
191
+ "loss": 0.3896,
192
  "step": 120
193
  },
194
  {
195
  "epoch": 12.0,
196
+ "eval_accuracy": 0.903125,
197
+ "eval_loss": 0.3030492663383484,
198
+ "eval_runtime": 2.79,
199
+ "eval_samples_per_second": 114.693,
200
+ "eval_steps_per_second": 3.584,
201
  "step": 120
202
  },
203
  {
204
  "epoch": 13.0,
205
+ "grad_norm": 4.165198802947998,
206
  "learning_rate": 1.9444444444444445e-05,
207
+ "loss": 0.3822,
208
  "step": 130
209
  },
210
  {
211
  "epoch": 13.0,
212
+ "eval_accuracy": 0.88125,
213
+ "eval_loss": 0.34730494022369385,
214
+ "eval_runtime": 2.7802,
215
+ "eval_samples_per_second": 115.099,
216
+ "eval_steps_per_second": 3.597,
217
  "step": 130
218
  },
219
  {
220
  "epoch": 14.0,
221
+ "grad_norm": 5.396005153656006,
222
  "learning_rate": 1.6666666666666667e-05,
223
+ "loss": 0.3985,
224
  "step": 140
225
  },
226
  {
227
  "epoch": 14.0,
228
+ "eval_accuracy": 0.8875,
229
+ "eval_loss": 0.32879379391670227,
230
+ "eval_runtime": 2.7703,
231
+ "eval_samples_per_second": 115.511,
232
+ "eval_steps_per_second": 3.61,
233
  "step": 140
234
  },
235
  {
236
  "epoch": 15.0,
237
+ "grad_norm": 5.696004390716553,
238
  "learning_rate": 1.388888888888889e-05,
239
+ "loss": 0.3826,
240
  "step": 150
241
  },
242
  {
243
  "epoch": 15.0,
244
+ "eval_accuracy": 0.9,
245
+ "eval_loss": 0.2924533486366272,
246
+ "eval_runtime": 2.7303,
247
+ "eval_samples_per_second": 117.204,
248
+ "eval_steps_per_second": 3.663,
249
  "step": 150
250
  },
251
  {
252
  "epoch": 16.0,
253
+ "grad_norm": 7.218973636627197,
254
  "learning_rate": 1.1111111111111112e-05,
255
+ "loss": 0.3716,
256
  "step": 160
257
  },
258
  {
259
  "epoch": 16.0,
260
+ "eval_accuracy": 0.875,
261
+ "eval_loss": 0.36188262701034546,
262
+ "eval_runtime": 2.726,
263
+ "eval_samples_per_second": 117.39,
264
+ "eval_steps_per_second": 3.668,
265
  "step": 160
266
  },
267
  {
268
  "epoch": 17.0,
269
+ "grad_norm": 7.728688716888428,
270
  "learning_rate": 8.333333333333334e-06,
271
+ "loss": 0.365,
272
  "step": 170
273
  },
274
  {
275
  "epoch": 17.0,
276
+ "eval_accuracy": 0.9,
277
+ "eval_loss": 0.2941049039363861,
278
+ "eval_runtime": 2.7206,
279
+ "eval_samples_per_second": 117.621,
280
+ "eval_steps_per_second": 3.676,
281
  "step": 170
282
  },
283
  {
284
  "epoch": 18.0,
285
+ "grad_norm": 5.3787126541137695,
286
  "learning_rate": 5.555555555555556e-06,
287
+ "loss": 0.3379,
288
  "step": 180
289
  },
290
  {
291
  "epoch": 18.0,
292
+ "eval_accuracy": 0.884375,
293
+ "eval_loss": 0.32390105724334717,
294
+ "eval_runtime": 2.7433,
295
+ "eval_samples_per_second": 116.648,
296
+ "eval_steps_per_second": 3.645,
297
  "step": 180
298
  },
299
  {
300
  "epoch": 19.0,
301
+ "grad_norm": 7.723823070526123,
302
  "learning_rate": 2.777777777777778e-06,
303
+ "loss": 0.3365,
304
  "step": 190
305
  },
306
  {
307
  "epoch": 19.0,
308
+ "eval_accuracy": 0.890625,
309
+ "eval_loss": 0.32600170373916626,
310
+ "eval_runtime": 2.7308,
311
+ "eval_samples_per_second": 117.184,
312
+ "eval_steps_per_second": 3.662,
313
  "step": 190
314
  },
315
  {
316
  "epoch": 20.0,
317
+ "grad_norm": 3.916741132736206,
318
  "learning_rate": 0.0,
319
+ "loss": 0.3429,
320
  "step": 200
321
  },
322
  {
323
  "epoch": 20.0,
324
+ "eval_accuracy": 0.89375,
325
+ "eval_loss": 0.3095899522304535,
326
+ "eval_runtime": 2.7374,
327
+ "eval_samples_per_second": 116.898,
328
+ "eval_steps_per_second": 3.653,
329
  "step": 200
330
  },
331
  {
332
  "epoch": 20.0,
333
  "step": 200,
334
  "total_flos": 8.32925255860224e+17,
335
+ "train_loss": 0.5333085978031158,
336
+ "train_runtime": 482.5391,
337
+ "train_samples_per_second": 53.053,
338
+ "train_steps_per_second": 0.414
339
  }
340
  ],
341
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89704a8ffa8fa935c58374faffa61f4422a00f901c2baed996bf4daaf46ca61f
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:976498a472cd8225fd32cf833b48c1556b1af4b1f1385edd65a6ca2c0cf00704
3
  size 5176