JackWong0911 commited on
Commit
a3ed035
·
verified ·
1 Parent(s): 3f1f89d

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +5 -5
  2. test_results.json +5 -5
  3. trainer_state.json +104 -113
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.17,
3
- "eval_accuracy": 0.8987341772151899,
4
- "eval_loss": 0.492713063955307,
5
- "eval_runtime": 66.3108,
6
- "eval_samples_per_second": 2.383,
7
- "eval_steps_per_second": 2.383
8
  }
 
1
  {
2
  "epoch": 5.17,
3
+ "eval_accuracy": 0.9117647058823529,
4
+ "eval_loss": 0.37310147285461426,
5
+ "eval_runtime": 33.7759,
6
+ "eval_samples_per_second": 2.013,
7
+ "eval_steps_per_second": 2.013
8
  }
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.17,
3
- "eval_accuracy": 0.8987341772151899,
4
- "eval_loss": 0.492713063955307,
5
- "eval_runtime": 66.3108,
6
- "eval_samples_per_second": 2.383,
7
- "eval_steps_per_second": 2.383
8
  }
 
1
  {
2
  "epoch": 5.17,
3
+ "eval_accuracy": 0.9117647058823529,
4
+ "eval_loss": 0.37310147285461426,
5
+ "eval_runtime": 33.7759,
6
+ "eval_samples_per_second": 2.013,
7
+ "eval_steps_per_second": 2.013
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.9285714285714286,
3
  "best_model_checkpoint": "timesformer-base-finetuned-k400-finetuned-kinetic-subset-three-local-temporal-with-spatial/checkpoint-150",
4
  "epoch": 5.166666666666667,
5
  "eval_steps": 500,
@@ -10,302 +10,293 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.03,
13
- "grad_norm": 32.2842903137207,
14
  "learning_rate": 1.6666666666666667e-05,
15
- "loss": 1.6309,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.07,
20
- "grad_norm": 29.811689376831055,
21
  "learning_rate": 3.3333333333333335e-05,
22
- "loss": 1.7024,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.1,
27
- "grad_norm": 26.413127899169922,
28
  "learning_rate": 5e-05,
29
- "loss": 1.5057,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.13,
34
- "grad_norm": 17.640932083129883,
35
  "learning_rate": 4.814814814814815e-05,
36
- "loss": 0.8117,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.17,
41
- "grad_norm": 33.26347732543945,
42
  "learning_rate": 4.62962962962963e-05,
43
- "loss": 0.2937,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.17,
48
- "eval_accuracy": 0.8690476190476191,
49
- "eval_loss": 0.3884676396846771,
50
- "eval_runtime": 105.2382,
51
- "eval_samples_per_second": 1.596,
52
- "eval_steps_per_second": 1.596,
53
  "step": 50
54
  },
55
  {
56
  "epoch": 1.03,
57
- "grad_norm": 0.8504588007926941,
58
  "learning_rate": 4.4444444444444447e-05,
59
- "loss": 0.4217,
60
  "step": 60
61
  },
62
  {
63
  "epoch": 1.07,
64
- "grad_norm": 0.1911085844039917,
65
  "learning_rate": 4.259259259259259e-05,
66
- "loss": 0.2901,
67
  "step": 70
68
  },
69
  {
70
  "epoch": 1.1,
71
- "grad_norm": 0.12057169526815414,
72
  "learning_rate": 4.074074074074074e-05,
73
- "loss": 0.0256,
74
  "step": 80
75
  },
76
  {
77
  "epoch": 1.13,
78
- "grad_norm": 0.2651258111000061,
79
  "learning_rate": 3.888888888888889e-05,
80
- "loss": 0.0627,
81
  "step": 90
82
  },
83
  {
84
  "epoch": 1.17,
85
- "grad_norm": 0.1569805145263672,
86
  "learning_rate": 3.7037037037037037e-05,
87
- "loss": 0.2365,
88
  "step": 100
89
  },
90
  {
91
  "epoch": 1.17,
92
- "eval_accuracy": 0.8809523809523809,
93
- "eval_loss": 0.600234866142273,
94
- "eval_runtime": 109.7974,
95
- "eval_samples_per_second": 1.53,
96
- "eval_steps_per_second": 1.53,
97
  "step": 100
98
  },
99
  {
100
  "epoch": 2.03,
101
- "grad_norm": 0.038823891431093216,
102
  "learning_rate": 3.518518518518519e-05,
103
- "loss": 0.0043,
104
  "step": 110
105
  },
106
  {
107
  "epoch": 2.07,
108
- "grad_norm": 0.037996720522642136,
109
  "learning_rate": 3.3333333333333335e-05,
110
- "loss": 0.0062,
111
  "step": 120
112
  },
113
  {
114
  "epoch": 2.1,
115
- "grad_norm": 0.05053324997425079,
116
  "learning_rate": 3.148148148148148e-05,
117
- "loss": 0.0297,
118
  "step": 130
119
  },
120
  {
121
  "epoch": 2.13,
122
- "grad_norm": 1.2805042266845703,
123
  "learning_rate": 2.962962962962963e-05,
124
- "loss": 0.0046,
125
  "step": 140
126
  },
127
  {
128
  "epoch": 2.17,
129
- "grad_norm": 0.19171175360679626,
130
  "learning_rate": 2.777777777777778e-05,
131
- "loss": 0.0164,
132
  "step": 150
133
  },
134
  {
135
  "epoch": 2.17,
136
- "eval_accuracy": 0.9285714285714286,
137
- "eval_loss": 0.2311260849237442,
138
- "eval_runtime": 108.5839,
139
- "eval_samples_per_second": 1.547,
140
- "eval_steps_per_second": 1.547,
141
  "step": 150
142
  },
143
  {
144
  "epoch": 3.03,
145
- "grad_norm": 0.025192299857735634,
146
  "learning_rate": 2.5925925925925925e-05,
147
- "loss": 0.1078,
148
  "step": 160
149
  },
150
  {
151
  "epoch": 3.07,
152
- "grad_norm": 0.02108108066022396,
153
  "learning_rate": 2.4074074074074074e-05,
154
- "loss": 0.6885,
155
  "step": 170
156
  },
157
  {
158
  "epoch": 3.1,
159
- "grad_norm": 0.008117140270769596,
160
  "learning_rate": 2.2222222222222223e-05,
161
- "loss": 0.0011,
162
  "step": 180
163
  },
164
  {
165
  "epoch": 3.13,
166
- "grad_norm": 0.0680527612566948,
167
  "learning_rate": 2.037037037037037e-05,
168
- "loss": 0.0015,
169
  "step": 190
170
  },
171
  {
172
  "epoch": 3.17,
173
- "grad_norm": 0.011410138569772243,
174
  "learning_rate": 1.8518518518518518e-05,
175
- "loss": 0.4057,
176
  "step": 200
177
  },
178
  {
179
  "epoch": 3.17,
180
- "eval_accuracy": 0.8630952380952381,
181
- "eval_loss": 0.5045905113220215,
182
- "eval_runtime": 109.1813,
183
- "eval_samples_per_second": 1.539,
184
- "eval_steps_per_second": 1.539,
185
  "step": 200
186
  },
187
  {
188
  "epoch": 4.03,
189
- "grad_norm": 0.015822872519493103,
190
  "learning_rate": 1.6666666666666667e-05,
191
- "loss": 0.0012,
192
  "step": 210
193
  },
194
  {
195
  "epoch": 4.07,
196
- "grad_norm": 0.06866960972547531,
197
  "learning_rate": 1.4814814814814815e-05,
198
- "loss": 0.0008,
199
  "step": 220
200
  },
201
  {
202
  "epoch": 4.1,
203
- "grad_norm": 0.025955336168408394,
204
  "learning_rate": 1.2962962962962962e-05,
205
- "loss": 0.0014,
206
  "step": 230
207
  },
208
  {
209
  "epoch": 4.13,
210
- "grad_norm": 0.027819635346531868,
211
  "learning_rate": 1.1111111111111112e-05,
212
- "loss": 0.034,
213
  "step": 240
214
  },
215
  {
216
  "epoch": 4.17,
217
- "grad_norm": 0.011835525743663311,
218
  "learning_rate": 9.259259259259259e-06,
219
- "loss": 0.0004,
220
  "step": 250
221
  },
222
  {
223
  "epoch": 4.17,
224
- "eval_accuracy": 0.875,
225
- "eval_loss": 0.4492361545562744,
226
- "eval_runtime": 108.7199,
227
- "eval_samples_per_second": 1.545,
228
- "eval_steps_per_second": 1.545,
229
  "step": 250
230
  },
231
  {
232
  "epoch": 5.03,
233
- "grad_norm": 0.9948074817657471,
234
  "learning_rate": 7.4074074074074075e-06,
235
- "loss": 0.0022,
236
  "step": 260
237
  },
238
  {
239
  "epoch": 5.07,
240
- "grad_norm": 0.07322775572538376,
241
  "learning_rate": 5.555555555555556e-06,
242
- "loss": 0.0011,
243
  "step": 270
244
  },
245
  {
246
  "epoch": 5.1,
247
- "grad_norm": 0.01132030040025711,
248
  "learning_rate": 3.7037037037037037e-06,
249
- "loss": 0.0146,
250
  "step": 280
251
  },
252
  {
253
  "epoch": 5.13,
254
- "grad_norm": 48.047698974609375,
255
  "learning_rate": 1.8518518518518519e-06,
256
  "loss": 0.0825,
257
  "step": 290
258
  },
259
  {
260
  "epoch": 5.17,
261
- "grad_norm": 0.02245236746966839,
262
  "learning_rate": 0.0,
263
- "loss": 0.0007,
264
  "step": 300
265
  },
266
  {
267
  "epoch": 5.17,
268
- "eval_accuracy": 0.8928571428571429,
269
- "eval_loss": 0.35668858885765076,
270
- "eval_runtime": 108.8318,
271
- "eval_samples_per_second": 1.544,
272
- "eval_steps_per_second": 1.544,
273
  "step": 300
274
  },
275
  {
276
  "epoch": 5.17,
277
  "step": 300,
278
  "total_flos": 3.28565653420032e+17,
279
- "train_loss": 0.2795241907471791,
280
- "train_runtime": 838.6117,
281
- "train_samples_per_second": 0.358,
282
- "train_steps_per_second": 0.358
283
- },
284
- {
285
- "epoch": 5.17,
286
- "eval_accuracy": 0.8987341772151899,
287
- "eval_loss": 0.49271300435066223,
288
- "eval_runtime": 66.5373,
289
- "eval_samples_per_second": 2.375,
290
- "eval_steps_per_second": 2.375,
291
- "step": 300
292
  },
293
  {
294
  "epoch": 5.17,
295
- "eval_accuracy": 0.8987341772151899,
296
- "eval_loss": 0.492713063955307,
297
- "eval_runtime": 65.6092,
298
- "eval_samples_per_second": 2.408,
299
- "eval_steps_per_second": 2.408,
300
  "step": 300
301
  },
302
  {
303
  "epoch": 5.17,
304
- "eval_accuracy": 0.8987341772151899,
305
- "eval_loss": 0.492713063955307,
306
- "eval_runtime": 66.3108,
307
- "eval_samples_per_second": 2.383,
308
- "eval_steps_per_second": 2.383,
309
  "step": 300
310
  }
311
  ],
 
1
  {
2
+ "best_metric": 0.9583333333333334,
3
  "best_model_checkpoint": "timesformer-base-finetuned-k400-finetuned-kinetic-subset-three-local-temporal-with-spatial/checkpoint-150",
4
  "epoch": 5.166666666666667,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.03,
13
+ "grad_norm": 0.01975257694721222,
14
  "learning_rate": 1.6666666666666667e-05,
15
+ "loss": 0.0009,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.07,
20
+ "grad_norm": 0.014412298798561096,
21
  "learning_rate": 3.3333333333333335e-05,
22
+ "loss": 0.0005,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.1,
27
+ "grad_norm": 0.048455920070409775,
28
  "learning_rate": 5e-05,
29
+ "loss": 0.0018,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.13,
34
+ "grad_norm": 0.003363919910043478,
35
  "learning_rate": 4.814814814814815e-05,
36
+ "loss": 0.0012,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.17,
41
+ "grad_norm": 0.024853387847542763,
42
  "learning_rate": 4.62962962962963e-05,
43
+ "loss": 0.0003,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.17,
48
+ "eval_accuracy": 0.8472222222222222,
49
+ "eval_loss": 0.4552870988845825,
50
+ "eval_runtime": 58.6213,
51
+ "eval_samples_per_second": 1.228,
52
+ "eval_steps_per_second": 1.228,
53
  "step": 50
54
  },
55
  {
56
  "epoch": 1.03,
57
+ "grad_norm": 0.004102244041860104,
58
  "learning_rate": 4.4444444444444447e-05,
59
+ "loss": 0.3192,
60
  "step": 60
61
  },
62
  {
63
  "epoch": 1.07,
64
+ "grad_norm": 0.003750218078494072,
65
  "learning_rate": 4.259259259259259e-05,
66
+ "loss": 0.0002,
67
  "step": 70
68
  },
69
  {
70
  "epoch": 1.1,
71
+ "grad_norm": 0.002901990432292223,
72
  "learning_rate": 4.074074074074074e-05,
73
+ "loss": 0.5754,
74
  "step": 80
75
  },
76
  {
77
  "epoch": 1.13,
78
+ "grad_norm": 0.019022488966584206,
79
  "learning_rate": 3.888888888888889e-05,
80
+ "loss": 0.0002,
81
  "step": 90
82
  },
83
  {
84
  "epoch": 1.17,
85
+ "grad_norm": 0.004384249914437532,
86
  "learning_rate": 3.7037037037037037e-05,
87
+ "loss": 0.0004,
88
  "step": 100
89
  },
90
  {
91
  "epoch": 1.17,
92
+ "eval_accuracy": 0.9027777777777778,
93
+ "eval_loss": 0.33492106199264526,
94
+ "eval_runtime": 58.3208,
95
+ "eval_samples_per_second": 1.235,
96
+ "eval_steps_per_second": 1.235,
97
  "step": 100
98
  },
99
  {
100
  "epoch": 2.03,
101
+ "grad_norm": 0.008162049576640129,
102
  "learning_rate": 3.518518518518519e-05,
103
+ "loss": 0.0003,
104
  "step": 110
105
  },
106
  {
107
  "epoch": 2.07,
108
+ "grad_norm": 0.0084863705560565,
109
  "learning_rate": 3.3333333333333335e-05,
110
+ "loss": 0.0002,
111
  "step": 120
112
  },
113
  {
114
  "epoch": 2.1,
115
+ "grad_norm": 0.005603179335594177,
116
  "learning_rate": 3.148148148148148e-05,
117
+ "loss": 0.0004,
118
  "step": 130
119
  },
120
  {
121
  "epoch": 2.13,
122
+ "grad_norm": 0.035682689398527145,
123
  "learning_rate": 2.962962962962963e-05,
124
+ "loss": 0.0003,
125
  "step": 140
126
  },
127
  {
128
  "epoch": 2.17,
129
+ "grad_norm": 0.07813193649053574,
130
  "learning_rate": 2.777777777777778e-05,
131
+ "loss": 0.0004,
132
  "step": 150
133
  },
134
  {
135
  "epoch": 2.17,
136
+ "eval_accuracy": 0.9583333333333334,
137
+ "eval_loss": 0.12790486216545105,
138
+ "eval_runtime": 58.0641,
139
+ "eval_samples_per_second": 1.24,
140
+ "eval_steps_per_second": 1.24,
141
  "step": 150
142
  },
143
  {
144
  "epoch": 3.03,
145
+ "grad_norm": 0.0019370431546121836,
146
  "learning_rate": 2.5925925925925925e-05,
147
+ "loss": 0.0003,
148
  "step": 160
149
  },
150
  {
151
  "epoch": 3.07,
152
+ "grad_norm": 0.0020284331403672695,
153
  "learning_rate": 2.4074074074074074e-05,
154
+ "loss": 0.5874,
155
  "step": 170
156
  },
157
  {
158
  "epoch": 3.1,
159
+ "grad_norm": 0.0013429196551442146,
160
  "learning_rate": 2.2222222222222223e-05,
161
+ "loss": 0.019,
162
  "step": 180
163
  },
164
  {
165
  "epoch": 3.13,
166
+ "grad_norm": 3.899311065673828,
167
  "learning_rate": 2.037037037037037e-05,
168
+ "loss": 0.0037,
169
  "step": 190
170
  },
171
  {
172
  "epoch": 3.17,
173
+ "grad_norm": 0.001104432623833418,
174
  "learning_rate": 1.8518518518518518e-05,
175
+ "loss": 0.0995,
176
  "step": 200
177
  },
178
  {
179
  "epoch": 3.17,
180
+ "eval_accuracy": 0.9305555555555556,
181
+ "eval_loss": 0.1180504709482193,
182
+ "eval_runtime": 57.5974,
183
+ "eval_samples_per_second": 1.25,
184
+ "eval_steps_per_second": 1.25,
185
  "step": 200
186
  },
187
  {
188
  "epoch": 4.03,
189
+ "grad_norm": 0.003288655774667859,
190
  "learning_rate": 1.6666666666666667e-05,
191
+ "loss": 0.0003,
192
  "step": 210
193
  },
194
  {
195
  "epoch": 4.07,
196
+ "grad_norm": 0.018307456746697426,
197
  "learning_rate": 1.4814814814814815e-05,
198
+ "loss": 0.0002,
199
  "step": 220
200
  },
201
  {
202
  "epoch": 4.1,
203
+ "grad_norm": 0.012207794934511185,
204
  "learning_rate": 1.2962962962962962e-05,
205
+ "loss": 0.0004,
206
  "step": 230
207
  },
208
  {
209
  "epoch": 4.13,
210
+ "grad_norm": 0.001230477704666555,
211
  "learning_rate": 1.1111111111111112e-05,
212
+ "loss": 0.0007,
213
  "step": 240
214
  },
215
  {
216
  "epoch": 4.17,
217
+ "grad_norm": 0.001645821612328291,
218
  "learning_rate": 9.259259259259259e-06,
219
+ "loss": 0.0001,
220
  "step": 250
221
  },
222
  {
223
  "epoch": 4.17,
224
+ "eval_accuracy": 0.9166666666666666,
225
+ "eval_loss": 0.17973226308822632,
226
+ "eval_runtime": 57.4339,
227
+ "eval_samples_per_second": 1.254,
228
+ "eval_steps_per_second": 1.254,
229
  "step": 250
230
  },
231
  {
232
  "epoch": 5.03,
233
+ "grad_norm": 0.04015364125370979,
234
  "learning_rate": 7.4074074074074075e-06,
235
+ "loss": 0.0002,
236
  "step": 260
237
  },
238
  {
239
  "epoch": 5.07,
240
+ "grad_norm": 0.10828184336423874,
241
  "learning_rate": 5.555555555555556e-06,
242
+ "loss": 0.0002,
243
  "step": 270
244
  },
245
  {
246
  "epoch": 5.1,
247
+ "grad_norm": 0.0032853526063263416,
248
  "learning_rate": 3.7037037037037037e-06,
249
+ "loss": 0.0006,
250
  "step": 280
251
  },
252
  {
253
  "epoch": 5.13,
254
+ "grad_norm": 0.005327914375811815,
255
  "learning_rate": 1.8518518518518519e-06,
256
  "loss": 0.0825,
257
  "step": 290
258
  },
259
  {
260
  "epoch": 5.17,
261
+ "grad_norm": 0.004417418502271175,
262
  "learning_rate": 0.0,
263
+ "loss": 0.0003,
264
  "step": 300
265
  },
266
  {
267
  "epoch": 5.17,
268
+ "eval_accuracy": 0.9444444444444444,
269
+ "eval_loss": 0.13933943212032318,
270
+ "eval_runtime": 56.7608,
271
+ "eval_samples_per_second": 1.268,
272
+ "eval_steps_per_second": 1.268,
273
  "step": 300
274
  },
275
  {
276
  "epoch": 5.17,
277
  "step": 300,
278
  "total_flos": 3.28565653420032e+17,
279
+ "train_loss": 0.056568971110973504,
280
+ "train_runtime": 562.6502,
281
+ "train_samples_per_second": 0.533,
282
+ "train_steps_per_second": 0.533
 
 
 
 
 
 
 
 
 
283
  },
284
  {
285
  "epoch": 5.17,
286
+ "eval_accuracy": 0.9117647058823529,
287
+ "eval_loss": 0.37310144305229187,
288
+ "eval_runtime": 34.9776,
289
+ "eval_samples_per_second": 1.944,
290
+ "eval_steps_per_second": 1.944,
291
  "step": 300
292
  },
293
  {
294
  "epoch": 5.17,
295
+ "eval_accuracy": 0.9117647058823529,
296
+ "eval_loss": 0.37310147285461426,
297
+ "eval_runtime": 33.7759,
298
+ "eval_samples_per_second": 2.013,
299
+ "eval_steps_per_second": 2.013,
300
  "step": 300
301
  }
302
  ],