anirudhmu commited on
Commit
1d57fb3
·
1 Parent(s): 3629b0d

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +8 -0
  2. test_results.json +8 -0
  3. trainer_state.json +538 -0
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 31.03,
3
+ "eval_accuracy": 0.9470198675496688,
4
+ "eval_loss": 0.2553568482398987,
5
+ "eval_runtime": 31.773,
6
+ "eval_samples_per_second": 4.752,
7
+ "eval_steps_per_second": 4.752
8
+ }
test_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 31.03,
3
+ "eval_accuracy": 0.9470198675496688,
4
+ "eval_loss": 0.2553568482398987,
5
+ "eval_runtime": 31.773,
6
+ "eval_samples_per_second": 4.752,
7
+ "eval_steps_per_second": 4.752
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,538 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9586206896551724,
3
+ "best_model_checkpoint": "videomae-base-finetuned-soccer-action-recognition/checkpoint-1790",
4
+ "epoch": 31.03125,
5
+ "eval_steps": 500,
6
+ "global_step": 2728,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03,
13
+ "learning_rate": 1.4652014652014653e-05,
14
+ "loss": 1.7115,
15
+ "step": 80
16
+ },
17
+ {
18
+ "epoch": 0.03,
19
+ "eval_accuracy": 0.4,
20
+ "eval_loss": 1.4195746183395386,
21
+ "eval_runtime": 150.0417,
22
+ "eval_samples_per_second": 0.966,
23
+ "eval_steps_per_second": 0.966,
24
+ "step": 85
25
+ },
26
+ {
27
+ "epoch": 1.03,
28
+ "learning_rate": 2.9304029304029305e-05,
29
+ "loss": 1.0097,
30
+ "step": 160
31
+ },
32
+ {
33
+ "epoch": 1.03,
34
+ "eval_accuracy": 0.6758620689655173,
35
+ "eval_loss": 0.7807376980781555,
36
+ "eval_runtime": 30.3768,
37
+ "eval_samples_per_second": 4.773,
38
+ "eval_steps_per_second": 4.773,
39
+ "step": 170
40
+ },
41
+ {
42
+ "epoch": 2.03,
43
+ "learning_rate": 4.3956043956043955e-05,
44
+ "loss": 0.6192,
45
+ "step": 240
46
+ },
47
+ {
48
+ "epoch": 2.03,
49
+ "eval_accuracy": 0.7034482758620689,
50
+ "eval_loss": 0.795186460018158,
51
+ "eval_runtime": 30.3396,
52
+ "eval_samples_per_second": 4.779,
53
+ "eval_steps_per_second": 4.779,
54
+ "step": 255
55
+ },
56
+ {
57
+ "epoch": 3.02,
58
+ "learning_rate": 4.904276985743381e-05,
59
+ "loss": 0.4713,
60
+ "step": 320
61
+ },
62
+ {
63
+ "epoch": 3.03,
64
+ "eval_accuracy": 0.7931034482758621,
65
+ "eval_loss": 0.6536410450935364,
66
+ "eval_runtime": 28.0955,
67
+ "eval_samples_per_second": 5.161,
68
+ "eval_steps_per_second": 5.161,
69
+ "step": 341
70
+ },
71
+ {
72
+ "epoch": 4.02,
73
+ "learning_rate": 4.7413441955193486e-05,
74
+ "loss": 0.3973,
75
+ "step": 400
76
+ },
77
+ {
78
+ "epoch": 4.03,
79
+ "eval_accuracy": 0.8689655172413793,
80
+ "eval_loss": 0.3637762665748596,
81
+ "eval_runtime": 28.0719,
82
+ "eval_samples_per_second": 5.165,
83
+ "eval_steps_per_second": 5.165,
84
+ "step": 426
85
+ },
86
+ {
87
+ "epoch": 5.02,
88
+ "learning_rate": 4.578411405295316e-05,
89
+ "loss": 0.3633,
90
+ "step": 480
91
+ },
92
+ {
93
+ "epoch": 5.03,
94
+ "eval_accuracy": 0.896551724137931,
95
+ "eval_loss": 0.36160850524902344,
96
+ "eval_runtime": 28.2429,
97
+ "eval_samples_per_second": 5.134,
98
+ "eval_steps_per_second": 5.134,
99
+ "step": 511
100
+ },
101
+ {
102
+ "epoch": 6.02,
103
+ "learning_rate": 4.415478615071284e-05,
104
+ "loss": 0.2336,
105
+ "step": 560
106
+ },
107
+ {
108
+ "epoch": 6.03,
109
+ "eval_accuracy": 0.896551724137931,
110
+ "eval_loss": 0.4579198360443115,
111
+ "eval_runtime": 30.0911,
112
+ "eval_samples_per_second": 4.819,
113
+ "eval_steps_per_second": 4.819,
114
+ "step": 596
115
+ },
116
+ {
117
+ "epoch": 7.02,
118
+ "learning_rate": 4.2525458248472504e-05,
119
+ "loss": 0.1997,
120
+ "step": 640
121
+ },
122
+ {
123
+ "epoch": 7.03,
124
+ "eval_accuracy": 0.6068965517241379,
125
+ "eval_loss": 1.597042202949524,
126
+ "eval_runtime": 27.9534,
127
+ "eval_samples_per_second": 5.187,
128
+ "eval_steps_per_second": 5.187,
129
+ "step": 682
130
+ },
131
+ {
132
+ "epoch": 8.01,
133
+ "learning_rate": 4.089613034623218e-05,
134
+ "loss": 0.2738,
135
+ "step": 720
136
+ },
137
+ {
138
+ "epoch": 8.03,
139
+ "eval_accuracy": 0.8689655172413793,
140
+ "eval_loss": 0.4101611375808716,
141
+ "eval_runtime": 27.8973,
142
+ "eval_samples_per_second": 5.198,
143
+ "eval_steps_per_second": 5.198,
144
+ "step": 767
145
+ },
146
+ {
147
+ "epoch": 9.01,
148
+ "learning_rate": 3.9266802443991856e-05,
149
+ "loss": 0.2492,
150
+ "step": 800
151
+ },
152
+ {
153
+ "epoch": 9.03,
154
+ "eval_accuracy": 0.8344827586206897,
155
+ "eval_loss": 0.7651154398918152,
156
+ "eval_runtime": 29.2625,
157
+ "eval_samples_per_second": 4.955,
158
+ "eval_steps_per_second": 4.955,
159
+ "step": 852
160
+ },
161
+ {
162
+ "epoch": 10.01,
163
+ "learning_rate": 3.763747454175153e-05,
164
+ "loss": 0.1568,
165
+ "step": 880
166
+ },
167
+ {
168
+ "epoch": 10.03,
169
+ "eval_accuracy": 0.8137931034482758,
170
+ "eval_loss": 0.8560731410980225,
171
+ "eval_runtime": 30.5796,
172
+ "eval_samples_per_second": 4.742,
173
+ "eval_steps_per_second": 4.742,
174
+ "step": 937
175
+ },
176
+ {
177
+ "epoch": 11.01,
178
+ "learning_rate": 3.60081466395112e-05,
179
+ "loss": 0.1856,
180
+ "step": 960
181
+ },
182
+ {
183
+ "epoch": 11.03,
184
+ "eval_accuracy": 0.9241379310344827,
185
+ "eval_loss": 0.2810556888580322,
186
+ "eval_runtime": 27.9183,
187
+ "eval_samples_per_second": 5.194,
188
+ "eval_steps_per_second": 5.194,
189
+ "step": 1023
190
+ },
191
+ {
192
+ "epoch": 12.01,
193
+ "learning_rate": 3.437881873727088e-05,
194
+ "loss": 0.1296,
195
+ "step": 1040
196
+ },
197
+ {
198
+ "epoch": 12.03,
199
+ "eval_accuracy": 0.9172413793103448,
200
+ "eval_loss": 0.3444097340106964,
201
+ "eval_runtime": 29.2588,
202
+ "eval_samples_per_second": 4.956,
203
+ "eval_steps_per_second": 4.956,
204
+ "step": 1108
205
+ },
206
+ {
207
+ "epoch": 13.0,
208
+ "learning_rate": 3.274949083503055e-05,
209
+ "loss": 0.0782,
210
+ "step": 1120
211
+ },
212
+ {
213
+ "epoch": 13.03,
214
+ "eval_accuracy": 0.9241379310344827,
215
+ "eval_loss": 0.34232720732688904,
216
+ "eval_runtime": 29.8187,
217
+ "eval_samples_per_second": 4.863,
218
+ "eval_steps_per_second": 4.863,
219
+ "step": 1193
220
+ },
221
+ {
222
+ "epoch": 14.0,
223
+ "learning_rate": 3.1120162932790225e-05,
224
+ "loss": 0.14,
225
+ "step": 1200
226
+ },
227
+ {
228
+ "epoch": 14.03,
229
+ "eval_accuracy": 0.9241379310344827,
230
+ "eval_loss": 0.31215447187423706,
231
+ "eval_runtime": 30.9503,
232
+ "eval_samples_per_second": 4.685,
233
+ "eval_steps_per_second": 4.685,
234
+ "step": 1278
235
+ },
236
+ {
237
+ "epoch": 15.0,
238
+ "learning_rate": 2.94908350305499e-05,
239
+ "loss": 0.0802,
240
+ "step": 1280
241
+ },
242
+ {
243
+ "epoch": 15.03,
244
+ "learning_rate": 2.786150712830957e-05,
245
+ "loss": 0.0689,
246
+ "step": 1360
247
+ },
248
+ {
249
+ "epoch": 15.03,
250
+ "eval_accuracy": 0.9172413793103448,
251
+ "eval_loss": 0.35335081815719604,
252
+ "eval_runtime": 30.933,
253
+ "eval_samples_per_second": 4.688,
254
+ "eval_steps_per_second": 4.688,
255
+ "step": 1364
256
+ },
257
+ {
258
+ "epoch": 16.03,
259
+ "learning_rate": 2.6232179226069247e-05,
260
+ "loss": 0.036,
261
+ "step": 1440
262
+ },
263
+ {
264
+ "epoch": 16.03,
265
+ "eval_accuracy": 0.9103448275862069,
266
+ "eval_loss": 0.4814639985561371,
267
+ "eval_runtime": 30.7939,
268
+ "eval_samples_per_second": 4.709,
269
+ "eval_steps_per_second": 4.709,
270
+ "step": 1449
271
+ },
272
+ {
273
+ "epoch": 17.03,
274
+ "learning_rate": 2.4602851323828923e-05,
275
+ "loss": 0.0695,
276
+ "step": 1520
277
+ },
278
+ {
279
+ "epoch": 17.03,
280
+ "eval_accuracy": 0.8827586206896552,
281
+ "eval_loss": 0.5698482394218445,
282
+ "eval_runtime": 30.7434,
283
+ "eval_samples_per_second": 4.716,
284
+ "eval_steps_per_second": 4.716,
285
+ "step": 1534
286
+ },
287
+ {
288
+ "epoch": 18.02,
289
+ "learning_rate": 2.29735234215886e-05,
290
+ "loss": 0.0618,
291
+ "step": 1600
292
+ },
293
+ {
294
+ "epoch": 18.03,
295
+ "eval_accuracy": 0.9310344827586207,
296
+ "eval_loss": 0.3052562177181244,
297
+ "eval_runtime": 29.2874,
298
+ "eval_samples_per_second": 4.951,
299
+ "eval_steps_per_second": 4.951,
300
+ "step": 1619
301
+ },
302
+ {
303
+ "epoch": 19.02,
304
+ "learning_rate": 2.134419551934827e-05,
305
+ "loss": 0.0553,
306
+ "step": 1680
307
+ },
308
+ {
309
+ "epoch": 19.03,
310
+ "eval_accuracy": 0.9241379310344827,
311
+ "eval_loss": 0.3443007469177246,
312
+ "eval_runtime": 30.1383,
313
+ "eval_samples_per_second": 4.811,
314
+ "eval_steps_per_second": 4.811,
315
+ "step": 1705
316
+ },
317
+ {
318
+ "epoch": 20.02,
319
+ "learning_rate": 1.9714867617107944e-05,
320
+ "loss": 0.0301,
321
+ "step": 1760
322
+ },
323
+ {
324
+ "epoch": 20.03,
325
+ "eval_accuracy": 0.9586206896551724,
326
+ "eval_loss": 0.1426996886730194,
327
+ "eval_runtime": 29.78,
328
+ "eval_samples_per_second": 4.869,
329
+ "eval_steps_per_second": 4.869,
330
+ "step": 1790
331
+ },
332
+ {
333
+ "epoch": 21.02,
334
+ "learning_rate": 1.808553971486762e-05,
335
+ "loss": 0.0412,
336
+ "step": 1840
337
+ },
338
+ {
339
+ "epoch": 21.03,
340
+ "eval_accuracy": 0.8689655172413793,
341
+ "eval_loss": 0.5618650317192078,
342
+ "eval_runtime": 29.9124,
343
+ "eval_samples_per_second": 4.847,
344
+ "eval_steps_per_second": 4.847,
345
+ "step": 1875
346
+ },
347
+ {
348
+ "epoch": 22.02,
349
+ "learning_rate": 1.6456211812627292e-05,
350
+ "loss": 0.0492,
351
+ "step": 1920
352
+ },
353
+ {
354
+ "epoch": 22.03,
355
+ "eval_accuracy": 0.8896551724137931,
356
+ "eval_loss": 0.5700664520263672,
357
+ "eval_runtime": 27.7486,
358
+ "eval_samples_per_second": 5.225,
359
+ "eval_steps_per_second": 5.225,
360
+ "step": 1960
361
+ },
362
+ {
363
+ "epoch": 23.01,
364
+ "learning_rate": 1.4826883910386965e-05,
365
+ "loss": 0.0171,
366
+ "step": 2000
367
+ },
368
+ {
369
+ "epoch": 23.03,
370
+ "eval_accuracy": 0.8689655172413793,
371
+ "eval_loss": 0.6377372145652771,
372
+ "eval_runtime": 29.1792,
373
+ "eval_samples_per_second": 4.969,
374
+ "eval_steps_per_second": 4.969,
375
+ "step": 2046
376
+ },
377
+ {
378
+ "epoch": 24.01,
379
+ "learning_rate": 1.3197556008146641e-05,
380
+ "loss": 0.0181,
381
+ "step": 2080
382
+ },
383
+ {
384
+ "epoch": 24.03,
385
+ "eval_accuracy": 0.8827586206896552,
386
+ "eval_loss": 0.5981259346008301,
387
+ "eval_runtime": 30.3638,
388
+ "eval_samples_per_second": 4.775,
389
+ "eval_steps_per_second": 4.775,
390
+ "step": 2131
391
+ },
392
+ {
393
+ "epoch": 25.01,
394
+ "learning_rate": 1.1568228105906315e-05,
395
+ "loss": 0.0305,
396
+ "step": 2160
397
+ },
398
+ {
399
+ "epoch": 25.03,
400
+ "eval_accuracy": 0.9448275862068966,
401
+ "eval_loss": 0.3177809715270996,
402
+ "eval_runtime": 30.2408,
403
+ "eval_samples_per_second": 4.795,
404
+ "eval_steps_per_second": 4.795,
405
+ "step": 2216
406
+ },
407
+ {
408
+ "epoch": 26.01,
409
+ "learning_rate": 9.938900203665988e-06,
410
+ "loss": 0.0393,
411
+ "step": 2240
412
+ },
413
+ {
414
+ "epoch": 26.03,
415
+ "eval_accuracy": 0.9103448275862069,
416
+ "eval_loss": 0.5434169173240662,
417
+ "eval_runtime": 29.452,
418
+ "eval_samples_per_second": 4.923,
419
+ "eval_steps_per_second": 4.923,
420
+ "step": 2301
421
+ },
422
+ {
423
+ "epoch": 27.01,
424
+ "learning_rate": 8.309572301425662e-06,
425
+ "loss": 0.0248,
426
+ "step": 2320
427
+ },
428
+ {
429
+ "epoch": 27.03,
430
+ "eval_accuracy": 0.9241379310344827,
431
+ "eval_loss": 0.40973013639450073,
432
+ "eval_runtime": 30.9246,
433
+ "eval_samples_per_second": 4.689,
434
+ "eval_steps_per_second": 4.689,
435
+ "step": 2387
436
+ },
437
+ {
438
+ "epoch": 28.0,
439
+ "learning_rate": 6.6802443991853366e-06,
440
+ "loss": 0.0146,
441
+ "step": 2400
442
+ },
443
+ {
444
+ "epoch": 28.03,
445
+ "eval_accuracy": 0.9103448275862069,
446
+ "eval_loss": 0.44273480772972107,
447
+ "eval_runtime": 29.0928,
448
+ "eval_samples_per_second": 4.984,
449
+ "eval_steps_per_second": 4.984,
450
+ "step": 2472
451
+ },
452
+ {
453
+ "epoch": 29.0,
454
+ "learning_rate": 5.05091649694501e-06,
455
+ "loss": 0.012,
456
+ "step": 2480
457
+ },
458
+ {
459
+ "epoch": 29.03,
460
+ "eval_accuracy": 0.903448275862069,
461
+ "eval_loss": 0.5618546605110168,
462
+ "eval_runtime": 29.5423,
463
+ "eval_samples_per_second": 4.908,
464
+ "eval_steps_per_second": 4.908,
465
+ "step": 2557
466
+ },
467
+ {
468
+ "epoch": 30.0,
469
+ "learning_rate": 3.4215885947046847e-06,
470
+ "loss": 0.0036,
471
+ "step": 2560
472
+ },
473
+ {
474
+ "epoch": 30.03,
475
+ "learning_rate": 1.7922606924643586e-06,
476
+ "loss": 0.0065,
477
+ "step": 2640
478
+ },
479
+ {
480
+ "epoch": 30.03,
481
+ "eval_accuracy": 0.9103448275862069,
482
+ "eval_loss": 0.5383774042129517,
483
+ "eval_runtime": 30.5686,
484
+ "eval_samples_per_second": 4.743,
485
+ "eval_steps_per_second": 4.743,
486
+ "step": 2642
487
+ },
488
+ {
489
+ "epoch": 31.03,
490
+ "learning_rate": 1.629327902240326e-07,
491
+ "loss": 0.009,
492
+ "step": 2720
493
+ },
494
+ {
495
+ "epoch": 31.03,
496
+ "eval_accuracy": 0.9172413793103448,
497
+ "eval_loss": 0.5013585686683655,
498
+ "eval_runtime": 30.4753,
499
+ "eval_samples_per_second": 4.758,
500
+ "eval_steps_per_second": 4.758,
501
+ "step": 2728
502
+ },
503
+ {
504
+ "epoch": 31.03,
505
+ "step": 2728,
506
+ "total_flos": 2.719508368433401e+19,
507
+ "train_loss": 0.20195167288498247,
508
+ "train_runtime": 11757.4892,
509
+ "train_samples_per_second": 1.856,
510
+ "train_steps_per_second": 0.232
511
+ },
512
+ {
513
+ "epoch": 31.03,
514
+ "eval_accuracy": 0.9470198675496688,
515
+ "eval_loss": 0.2553568482398987,
516
+ "eval_runtime": 163.4914,
517
+ "eval_samples_per_second": 0.924,
518
+ "eval_steps_per_second": 0.924,
519
+ "step": 2728
520
+ },
521
+ {
522
+ "epoch": 31.03,
523
+ "eval_accuracy": 0.9470198675496688,
524
+ "eval_loss": 0.2553568482398987,
525
+ "eval_runtime": 31.773,
526
+ "eval_samples_per_second": 4.752,
527
+ "eval_steps_per_second": 4.752,
528
+ "step": 2728
529
+ }
530
+ ],
531
+ "logging_steps": 80,
532
+ "max_steps": 2728,
533
+ "num_train_epochs": 9223372036854775807,
534
+ "save_steps": 500,
535
+ "total_flos": 2.719508368433401e+19,
536
+ "trial_name": null,
537
+ "trial_params": null
538
+ }