qmeeus commited on
Commit
ac75440
1 Parent(s): 66d730a

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.99,
3
+ "eval_accuracy": 0.9998191681735985,
4
+ "eval_loss": 0.01826309971511364,
5
+ "eval_runtime": 213.4605,
6
+ "eval_samples_per_second": 25.906,
7
+ "eval_steps_per_second": 0.81,
8
+ "train_loss": 0.042007273906525575,
9
+ "train_runtime": 10719.7329,
10
+ "train_samples_per_second": 19.016,
11
+ "train_steps_per_second": 0.148
12
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.99,
3
+ "eval_accuracy": 0.9998191681735985,
4
+ "eval_loss": 0.01826309971511364,
5
+ "eval_runtime": 213.4605,
6
+ "eval_samples_per_second": 25.906,
7
+ "eval_steps_per_second": 0.81
8
+ }
runs/Jul12_17-43-17_fasso.esat.kuleuven.be/events.out.tfevents.1689187642.fasso.esat.kuleuven.be.1126111.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4aed77e07435fb038ca3382d327dd8d35c660eabbffc6282a3a836a481de8420
3
+ size 411
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.99,
3
+ "train_loss": 0.042007273906525575,
4
+ "train_runtime": 10719.7329,
5
+ "train_samples_per_second": 19.016,
6
+ "train_steps_per_second": 0.148
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,1024 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9998191681735985,
3
+ "best_model_checkpoint": "outputs/whisper-small-keyword-spotting/checkpoint-637",
4
+ "epoch": 4.988235294117647,
5
+ "global_step": 1590,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.03,
12
+ "learning_rate": 6.289308176100629e-05,
13
+ "loss": 0.6892,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.06,
18
+ "learning_rate": 0.00012578616352201257,
19
+ "loss": 0.6605,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.09,
24
+ "learning_rate": 0.00018867924528301889,
25
+ "loss": 0.6284,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.13,
30
+ "learning_rate": 0.00025157232704402514,
31
+ "loss": 0.577,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.16,
36
+ "learning_rate": 0.00031446540880503143,
37
+ "loss": 0.5128,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 0.19,
42
+ "learning_rate": 0.00037735849056603777,
43
+ "loss": 0.4114,
44
+ "step": 60
45
+ },
46
+ {
47
+ "epoch": 0.22,
48
+ "learning_rate": 0.00044025157232704406,
49
+ "loss": 0.3091,
50
+ "step": 70
51
+ },
52
+ {
53
+ "epoch": 0.25,
54
+ "learning_rate": 0.0005031446540880503,
55
+ "loss": 0.2469,
56
+ "step": 80
57
+ },
58
+ {
59
+ "epoch": 0.28,
60
+ "learning_rate": 0.0005660377358490566,
61
+ "loss": 0.1943,
62
+ "step": 90
63
+ },
64
+ {
65
+ "epoch": 0.31,
66
+ "learning_rate": 0.0006289308176100629,
67
+ "loss": 0.1421,
68
+ "step": 100
69
+ },
70
+ {
71
+ "epoch": 0.35,
72
+ "learning_rate": 0.0006918238993710692,
73
+ "loss": 0.1142,
74
+ "step": 110
75
+ },
76
+ {
77
+ "epoch": 0.38,
78
+ "learning_rate": 0.0007547169811320755,
79
+ "loss": 0.0857,
80
+ "step": 120
81
+ },
82
+ {
83
+ "epoch": 0.41,
84
+ "learning_rate": 0.0008176100628930818,
85
+ "loss": 0.0679,
86
+ "step": 130
87
+ },
88
+ {
89
+ "epoch": 0.44,
90
+ "learning_rate": 0.0008805031446540881,
91
+ "loss": 0.0663,
92
+ "step": 140
93
+ },
94
+ {
95
+ "epoch": 0.47,
96
+ "learning_rate": 0.0009433962264150943,
97
+ "loss": 0.0443,
98
+ "step": 150
99
+ },
100
+ {
101
+ "epoch": 0.5,
102
+ "learning_rate": 0.0009993011879804333,
103
+ "loss": 0.047,
104
+ "step": 160
105
+ },
106
+ {
107
+ "epoch": 0.53,
108
+ "learning_rate": 0.000992313067784766,
109
+ "loss": 0.0362,
110
+ "step": 170
111
+ },
112
+ {
113
+ "epoch": 0.56,
114
+ "learning_rate": 0.0009853249475890984,
115
+ "loss": 0.0382,
116
+ "step": 180
117
+ },
118
+ {
119
+ "epoch": 0.6,
120
+ "learning_rate": 0.0009783368273934311,
121
+ "loss": 0.0522,
122
+ "step": 190
123
+ },
124
+ {
125
+ "epoch": 0.63,
126
+ "learning_rate": 0.0009713487071977638,
127
+ "loss": 0.022,
128
+ "step": 200
129
+ },
130
+ {
131
+ "epoch": 0.66,
132
+ "learning_rate": 0.0009643605870020965,
133
+ "loss": 0.0408,
134
+ "step": 210
135
+ },
136
+ {
137
+ "epoch": 0.69,
138
+ "learning_rate": 0.0009573724668064291,
139
+ "loss": 0.0392,
140
+ "step": 220
141
+ },
142
+ {
143
+ "epoch": 0.72,
144
+ "learning_rate": 0.0009503843466107617,
145
+ "loss": 0.0289,
146
+ "step": 230
147
+ },
148
+ {
149
+ "epoch": 0.75,
150
+ "learning_rate": 0.0009433962264150943,
151
+ "loss": 0.0249,
152
+ "step": 240
153
+ },
154
+ {
155
+ "epoch": 0.78,
156
+ "learning_rate": 0.0009364081062194269,
157
+ "loss": 0.0434,
158
+ "step": 250
159
+ },
160
+ {
161
+ "epoch": 0.82,
162
+ "learning_rate": 0.0009294199860237596,
163
+ "loss": 0.036,
164
+ "step": 260
165
+ },
166
+ {
167
+ "epoch": 0.85,
168
+ "learning_rate": 0.0009224318658280922,
169
+ "loss": 0.0372,
170
+ "step": 270
171
+ },
172
+ {
173
+ "epoch": 0.88,
174
+ "learning_rate": 0.0009154437456324249,
175
+ "loss": 0.0438,
176
+ "step": 280
177
+ },
178
+ {
179
+ "epoch": 0.91,
180
+ "learning_rate": 0.0009084556254367576,
181
+ "loss": 0.0273,
182
+ "step": 290
183
+ },
184
+ {
185
+ "epoch": 0.94,
186
+ "learning_rate": 0.0009014675052410902,
187
+ "loss": 0.022,
188
+ "step": 300
189
+ },
190
+ {
191
+ "epoch": 0.97,
192
+ "learning_rate": 0.0008944793850454228,
193
+ "loss": 0.0268,
194
+ "step": 310
195
+ },
196
+ {
197
+ "epoch": 1.0,
198
+ "eval_accuracy": 0.9685352622061483,
199
+ "eval_loss": 0.0720464363694191,
200
+ "eval_runtime": 218.3559,
201
+ "eval_samples_per_second": 25.326,
202
+ "eval_steps_per_second": 0.792,
203
+ "step": 318
204
+ },
205
+ {
206
+ "epoch": 1.0,
207
+ "learning_rate": 0.0008874912648497554,
208
+ "loss": 0.0216,
209
+ "step": 320
210
+ },
211
+ {
212
+ "epoch": 1.04,
213
+ "learning_rate": 0.0008805031446540881,
214
+ "loss": 0.0128,
215
+ "step": 330
216
+ },
217
+ {
218
+ "epoch": 1.07,
219
+ "learning_rate": 0.0008735150244584207,
220
+ "loss": 0.0152,
221
+ "step": 340
222
+ },
223
+ {
224
+ "epoch": 1.1,
225
+ "learning_rate": 0.0008665269042627534,
226
+ "loss": 0.0225,
227
+ "step": 350
228
+ },
229
+ {
230
+ "epoch": 1.13,
231
+ "learning_rate": 0.000859538784067086,
232
+ "loss": 0.022,
233
+ "step": 360
234
+ },
235
+ {
236
+ "epoch": 1.16,
237
+ "learning_rate": 0.0008525506638714185,
238
+ "loss": 0.019,
239
+ "step": 370
240
+ },
241
+ {
242
+ "epoch": 1.19,
243
+ "learning_rate": 0.0008455625436757512,
244
+ "loss": 0.0228,
245
+ "step": 380
246
+ },
247
+ {
248
+ "epoch": 1.22,
249
+ "learning_rate": 0.0008385744234800838,
250
+ "loss": 0.0206,
251
+ "step": 390
252
+ },
253
+ {
254
+ "epoch": 1.25,
255
+ "learning_rate": 0.0008315863032844165,
256
+ "loss": 0.0163,
257
+ "step": 400
258
+ },
259
+ {
260
+ "epoch": 1.29,
261
+ "learning_rate": 0.0008245981830887491,
262
+ "loss": 0.0193,
263
+ "step": 410
264
+ },
265
+ {
266
+ "epoch": 1.32,
267
+ "learning_rate": 0.0008176100628930818,
268
+ "loss": 0.0239,
269
+ "step": 420
270
+ },
271
+ {
272
+ "epoch": 1.35,
273
+ "learning_rate": 0.0008106219426974144,
274
+ "loss": 0.0268,
275
+ "step": 430
276
+ },
277
+ {
278
+ "epoch": 1.38,
279
+ "learning_rate": 0.000803633822501747,
280
+ "loss": 0.0201,
281
+ "step": 440
282
+ },
283
+ {
284
+ "epoch": 1.41,
285
+ "learning_rate": 0.0007966457023060797,
286
+ "loss": 0.0159,
287
+ "step": 450
288
+ },
289
+ {
290
+ "epoch": 1.44,
291
+ "learning_rate": 0.0007896575821104123,
292
+ "loss": 0.0171,
293
+ "step": 460
294
+ },
295
+ {
296
+ "epoch": 1.47,
297
+ "learning_rate": 0.000782669461914745,
298
+ "loss": 0.0201,
299
+ "step": 470
300
+ },
301
+ {
302
+ "epoch": 1.51,
303
+ "learning_rate": 0.0007756813417190776,
304
+ "loss": 0.016,
305
+ "step": 480
306
+ },
307
+ {
308
+ "epoch": 1.54,
309
+ "learning_rate": 0.0007686932215234103,
310
+ "loss": 0.0155,
311
+ "step": 490
312
+ },
313
+ {
314
+ "epoch": 1.57,
315
+ "learning_rate": 0.0007617051013277429,
316
+ "loss": 0.0144,
317
+ "step": 500
318
+ },
319
+ {
320
+ "epoch": 1.6,
321
+ "learning_rate": 0.0007547169811320755,
322
+ "loss": 0.0065,
323
+ "step": 510
324
+ },
325
+ {
326
+ "epoch": 1.63,
327
+ "learning_rate": 0.0007477288609364081,
328
+ "loss": 0.0117,
329
+ "step": 520
330
+ },
331
+ {
332
+ "epoch": 1.66,
333
+ "learning_rate": 0.0007407407407407407,
334
+ "loss": 0.0174,
335
+ "step": 530
336
+ },
337
+ {
338
+ "epoch": 1.69,
339
+ "learning_rate": 0.0007337526205450734,
340
+ "loss": 0.0117,
341
+ "step": 540
342
+ },
343
+ {
344
+ "epoch": 1.73,
345
+ "learning_rate": 0.000726764500349406,
346
+ "loss": 0.0165,
347
+ "step": 550
348
+ },
349
+ {
350
+ "epoch": 1.76,
351
+ "learning_rate": 0.0007197763801537387,
352
+ "loss": 0.0152,
353
+ "step": 560
354
+ },
355
+ {
356
+ "epoch": 1.79,
357
+ "learning_rate": 0.0007127882599580712,
358
+ "loss": 0.0111,
359
+ "step": 570
360
+ },
361
+ {
362
+ "epoch": 1.82,
363
+ "learning_rate": 0.0007058001397624039,
364
+ "loss": 0.0119,
365
+ "step": 580
366
+ },
367
+ {
368
+ "epoch": 1.85,
369
+ "learning_rate": 0.0006988120195667366,
370
+ "loss": 0.0134,
371
+ "step": 590
372
+ },
373
+ {
374
+ "epoch": 1.88,
375
+ "learning_rate": 0.0006918238993710692,
376
+ "loss": 0.0166,
377
+ "step": 600
378
+ },
379
+ {
380
+ "epoch": 1.91,
381
+ "learning_rate": 0.0006848357791754019,
382
+ "loss": 0.0271,
383
+ "step": 610
384
+ },
385
+ {
386
+ "epoch": 1.95,
387
+ "learning_rate": 0.0006778476589797345,
388
+ "loss": 0.0186,
389
+ "step": 620
390
+ },
391
+ {
392
+ "epoch": 1.98,
393
+ "learning_rate": 0.0006708595387840672,
394
+ "loss": 0.0195,
395
+ "step": 630
396
+ },
397
+ {
398
+ "epoch": 2.0,
399
+ "eval_accuracy": 0.9998191681735985,
400
+ "eval_loss": 0.01826309971511364,
401
+ "eval_runtime": 324.2355,
402
+ "eval_samples_per_second": 17.056,
403
+ "eval_steps_per_second": 0.534,
404
+ "step": 637
405
+ },
406
+ {
407
+ "epoch": 2.01,
408
+ "learning_rate": 0.0006638714185883997,
409
+ "loss": 0.0159,
410
+ "step": 640
411
+ },
412
+ {
413
+ "epoch": 2.04,
414
+ "learning_rate": 0.0006568832983927324,
415
+ "loss": 0.0125,
416
+ "step": 650
417
+ },
418
+ {
419
+ "epoch": 2.07,
420
+ "learning_rate": 0.000649895178197065,
421
+ "loss": 0.0205,
422
+ "step": 660
423
+ },
424
+ {
425
+ "epoch": 2.1,
426
+ "learning_rate": 0.0006429070580013976,
427
+ "loss": 0.0187,
428
+ "step": 670
429
+ },
430
+ {
431
+ "epoch": 2.13,
432
+ "learning_rate": 0.0006359189378057303,
433
+ "loss": 0.0138,
434
+ "step": 680
435
+ },
436
+ {
437
+ "epoch": 2.16,
438
+ "learning_rate": 0.0006289308176100629,
439
+ "loss": 0.0104,
440
+ "step": 690
441
+ },
442
+ {
443
+ "epoch": 2.2,
444
+ "learning_rate": 0.0006219426974143956,
445
+ "loss": 0.0114,
446
+ "step": 700
447
+ },
448
+ {
449
+ "epoch": 2.23,
450
+ "learning_rate": 0.0006149545772187281,
451
+ "loss": 0.0128,
452
+ "step": 710
453
+ },
454
+ {
455
+ "epoch": 2.26,
456
+ "learning_rate": 0.0006079664570230608,
457
+ "loss": 0.0073,
458
+ "step": 720
459
+ },
460
+ {
461
+ "epoch": 2.29,
462
+ "learning_rate": 0.0006009783368273934,
463
+ "loss": 0.0056,
464
+ "step": 730
465
+ },
466
+ {
467
+ "epoch": 2.32,
468
+ "learning_rate": 0.000593990216631726,
469
+ "loss": 0.0041,
470
+ "step": 740
471
+ },
472
+ {
473
+ "epoch": 2.35,
474
+ "learning_rate": 0.0005870020964360588,
475
+ "loss": 0.0106,
476
+ "step": 750
477
+ },
478
+ {
479
+ "epoch": 2.38,
480
+ "learning_rate": 0.0005800139762403914,
481
+ "loss": 0.0126,
482
+ "step": 760
483
+ },
484
+ {
485
+ "epoch": 2.42,
486
+ "learning_rate": 0.000573025856044724,
487
+ "loss": 0.0123,
488
+ "step": 770
489
+ },
490
+ {
491
+ "epoch": 2.45,
492
+ "learning_rate": 0.0005660377358490566,
493
+ "loss": 0.0117,
494
+ "step": 780
495
+ },
496
+ {
497
+ "epoch": 2.48,
498
+ "learning_rate": 0.0005590496156533893,
499
+ "loss": 0.0162,
500
+ "step": 790
501
+ },
502
+ {
503
+ "epoch": 2.51,
504
+ "learning_rate": 0.0005520614954577219,
505
+ "loss": 0.0107,
506
+ "step": 800
507
+ },
508
+ {
509
+ "epoch": 2.54,
510
+ "learning_rate": 0.0005450733752620545,
511
+ "loss": 0.0156,
512
+ "step": 810
513
+ },
514
+ {
515
+ "epoch": 2.57,
516
+ "learning_rate": 0.0005380852550663872,
517
+ "loss": 0.0108,
518
+ "step": 820
519
+ },
520
+ {
521
+ "epoch": 2.6,
522
+ "learning_rate": 0.0005310971348707198,
523
+ "loss": 0.0096,
524
+ "step": 830
525
+ },
526
+ {
527
+ "epoch": 2.64,
528
+ "learning_rate": 0.0005241090146750524,
529
+ "loss": 0.0139,
530
+ "step": 840
531
+ },
532
+ {
533
+ "epoch": 2.67,
534
+ "learning_rate": 0.000517120894479385,
535
+ "loss": 0.0089,
536
+ "step": 850
537
+ },
538
+ {
539
+ "epoch": 2.7,
540
+ "learning_rate": 0.0005101327742837177,
541
+ "loss": 0.0097,
542
+ "step": 860
543
+ },
544
+ {
545
+ "epoch": 2.73,
546
+ "learning_rate": 0.0005031446540880503,
547
+ "loss": 0.0096,
548
+ "step": 870
549
+ },
550
+ {
551
+ "epoch": 2.76,
552
+ "learning_rate": 0.000496156533892383,
553
+ "loss": 0.0063,
554
+ "step": 880
555
+ },
556
+ {
557
+ "epoch": 2.79,
558
+ "learning_rate": 0.0004891684136967156,
559
+ "loss": 0.0092,
560
+ "step": 890
561
+ },
562
+ {
563
+ "epoch": 2.82,
564
+ "learning_rate": 0.00048218029350104825,
565
+ "loss": 0.0113,
566
+ "step": 900
567
+ },
568
+ {
569
+ "epoch": 2.85,
570
+ "learning_rate": 0.00047519217330538083,
571
+ "loss": 0.0079,
572
+ "step": 910
573
+ },
574
+ {
575
+ "epoch": 2.89,
576
+ "learning_rate": 0.00046820405310971346,
577
+ "loss": 0.0112,
578
+ "step": 920
579
+ },
580
+ {
581
+ "epoch": 2.92,
582
+ "learning_rate": 0.0004612159329140461,
583
+ "loss": 0.0045,
584
+ "step": 930
585
+ },
586
+ {
587
+ "epoch": 2.95,
588
+ "learning_rate": 0.0004542278127183788,
589
+ "loss": 0.0069,
590
+ "step": 940
591
+ },
592
+ {
593
+ "epoch": 2.98,
594
+ "learning_rate": 0.0004472396925227114,
595
+ "loss": 0.0111,
596
+ "step": 950
597
+ },
598
+ {
599
+ "epoch": 3.0,
600
+ "eval_accuracy": 0.9168173598553345,
601
+ "eval_loss": 0.20090103149414062,
602
+ "eval_runtime": 323.1789,
603
+ "eval_samples_per_second": 17.111,
604
+ "eval_steps_per_second": 0.535,
605
+ "step": 956
606
+ },
607
+ {
608
+ "epoch": 3.01,
609
+ "learning_rate": 0.00044025157232704406,
610
+ "loss": 0.0061,
611
+ "step": 960
612
+ },
613
+ {
614
+ "epoch": 3.04,
615
+ "learning_rate": 0.0004332634521313767,
616
+ "loss": 0.0069,
617
+ "step": 970
618
+ },
619
+ {
620
+ "epoch": 3.07,
621
+ "learning_rate": 0.0004262753319357093,
622
+ "loss": 0.017,
623
+ "step": 980
624
+ },
625
+ {
626
+ "epoch": 3.11,
627
+ "learning_rate": 0.0004192872117400419,
628
+ "loss": 0.0054,
629
+ "step": 990
630
+ },
631
+ {
632
+ "epoch": 3.14,
633
+ "learning_rate": 0.00041229909154437454,
634
+ "loss": 0.0112,
635
+ "step": 1000
636
+ },
637
+ {
638
+ "epoch": 3.17,
639
+ "learning_rate": 0.0004053109713487072,
640
+ "loss": 0.0132,
641
+ "step": 1010
642
+ },
643
+ {
644
+ "epoch": 3.2,
645
+ "learning_rate": 0.00039832285115303987,
646
+ "loss": 0.0105,
647
+ "step": 1020
648
+ },
649
+ {
650
+ "epoch": 3.23,
651
+ "learning_rate": 0.0003913347309573725,
652
+ "loss": 0.0121,
653
+ "step": 1030
654
+ },
655
+ {
656
+ "epoch": 3.26,
657
+ "learning_rate": 0.00038434661076170514,
658
+ "loss": 0.0077,
659
+ "step": 1040
660
+ },
661
+ {
662
+ "epoch": 3.29,
663
+ "learning_rate": 0.00037735849056603777,
664
+ "loss": 0.0039,
665
+ "step": 1050
666
+ },
667
+ {
668
+ "epoch": 3.33,
669
+ "learning_rate": 0.00037037037037037035,
670
+ "loss": 0.0074,
671
+ "step": 1060
672
+ },
673
+ {
674
+ "epoch": 3.36,
675
+ "learning_rate": 0.000363382250174703,
676
+ "loss": 0.0107,
677
+ "step": 1070
678
+ },
679
+ {
680
+ "epoch": 3.39,
681
+ "learning_rate": 0.0003563941299790356,
682
+ "loss": 0.0092,
683
+ "step": 1080
684
+ },
685
+ {
686
+ "epoch": 3.42,
687
+ "learning_rate": 0.0003494060097833683,
688
+ "loss": 0.0032,
689
+ "step": 1090
690
+ },
691
+ {
692
+ "epoch": 3.45,
693
+ "learning_rate": 0.00034241788958770095,
694
+ "loss": 0.0145,
695
+ "step": 1100
696
+ },
697
+ {
698
+ "epoch": 3.48,
699
+ "learning_rate": 0.0003354297693920336,
700
+ "loss": 0.0044,
701
+ "step": 1110
702
+ },
703
+ {
704
+ "epoch": 3.51,
705
+ "learning_rate": 0.0003284416491963662,
706
+ "loss": 0.0059,
707
+ "step": 1120
708
+ },
709
+ {
710
+ "epoch": 3.55,
711
+ "learning_rate": 0.0003214535290006988,
712
+ "loss": 0.0052,
713
+ "step": 1130
714
+ },
715
+ {
716
+ "epoch": 3.58,
717
+ "learning_rate": 0.00031446540880503143,
718
+ "loss": 0.0101,
719
+ "step": 1140
720
+ },
721
+ {
722
+ "epoch": 3.61,
723
+ "learning_rate": 0.00030747728860936407,
724
+ "loss": 0.0125,
725
+ "step": 1150
726
+ },
727
+ {
728
+ "epoch": 3.64,
729
+ "learning_rate": 0.0003004891684136967,
730
+ "loss": 0.0097,
731
+ "step": 1160
732
+ },
733
+ {
734
+ "epoch": 3.67,
735
+ "learning_rate": 0.0002935010482180294,
736
+ "loss": 0.0075,
737
+ "step": 1170
738
+ },
739
+ {
740
+ "epoch": 3.7,
741
+ "learning_rate": 0.000286512928022362,
742
+ "loss": 0.0057,
743
+ "step": 1180
744
+ },
745
+ {
746
+ "epoch": 3.73,
747
+ "learning_rate": 0.00027952480782669466,
748
+ "loss": 0.008,
749
+ "step": 1190
750
+ },
751
+ {
752
+ "epoch": 3.76,
753
+ "learning_rate": 0.00027253668763102724,
754
+ "loss": 0.0081,
755
+ "step": 1200
756
+ },
757
+ {
758
+ "epoch": 3.8,
759
+ "learning_rate": 0.0002655485674353599,
760
+ "loss": 0.0046,
761
+ "step": 1210
762
+ },
763
+ {
764
+ "epoch": 3.83,
765
+ "learning_rate": 0.0002585604472396925,
766
+ "loss": 0.0087,
767
+ "step": 1220
768
+ },
769
+ {
770
+ "epoch": 3.86,
771
+ "learning_rate": 0.00025157232704402514,
772
+ "loss": 0.0067,
773
+ "step": 1230
774
+ },
775
+ {
776
+ "epoch": 3.89,
777
+ "learning_rate": 0.0002445842068483578,
778
+ "loss": 0.0062,
779
+ "step": 1240
780
+ },
781
+ {
782
+ "epoch": 3.92,
783
+ "learning_rate": 0.00023759608665269041,
784
+ "loss": 0.0037,
785
+ "step": 1250
786
+ },
787
+ {
788
+ "epoch": 3.95,
789
+ "learning_rate": 0.00023060796645702305,
790
+ "loss": 0.006,
791
+ "step": 1260
792
+ },
793
+ {
794
+ "epoch": 3.98,
795
+ "learning_rate": 0.0002236198462613557,
796
+ "loss": 0.0065,
797
+ "step": 1270
798
+ },
799
+ {
800
+ "epoch": 4.0,
801
+ "eval_accuracy": 0.8544303797468354,
802
+ "eval_loss": 0.28469616174697876,
803
+ "eval_runtime": 254.8847,
804
+ "eval_samples_per_second": 21.696,
805
+ "eval_steps_per_second": 0.679,
806
+ "step": 1275
807
+ },
808
+ {
809
+ "epoch": 4.02,
810
+ "learning_rate": 0.00021663172606568835,
811
+ "loss": 0.0092,
812
+ "step": 1280
813
+ },
814
+ {
815
+ "epoch": 4.05,
816
+ "learning_rate": 0.00020964360587002095,
817
+ "loss": 0.0068,
818
+ "step": 1290
819
+ },
820
+ {
821
+ "epoch": 4.08,
822
+ "learning_rate": 0.0002026554856743536,
823
+ "loss": 0.0058,
824
+ "step": 1300
825
+ },
826
+ {
827
+ "epoch": 4.11,
828
+ "learning_rate": 0.00019566736547868625,
829
+ "loss": 0.0083,
830
+ "step": 1310
831
+ },
832
+ {
833
+ "epoch": 4.14,
834
+ "learning_rate": 0.00018867924528301889,
835
+ "loss": 0.0049,
836
+ "step": 1320
837
+ },
838
+ {
839
+ "epoch": 4.17,
840
+ "learning_rate": 0.0001816911250873515,
841
+ "loss": 0.0085,
842
+ "step": 1330
843
+ },
844
+ {
845
+ "epoch": 4.2,
846
+ "learning_rate": 0.00017470300489168416,
847
+ "loss": 0.0071,
848
+ "step": 1340
849
+ },
850
+ {
851
+ "epoch": 4.24,
852
+ "learning_rate": 0.0001677148846960168,
853
+ "loss": 0.0047,
854
+ "step": 1350
855
+ },
856
+ {
857
+ "epoch": 4.27,
858
+ "learning_rate": 0.0001607267645003494,
859
+ "loss": 0.0073,
860
+ "step": 1360
861
+ },
862
+ {
863
+ "epoch": 4.3,
864
+ "learning_rate": 0.00015373864430468203,
865
+ "loss": 0.0051,
866
+ "step": 1370
867
+ },
868
+ {
869
+ "epoch": 4.33,
870
+ "learning_rate": 0.0001467505241090147,
871
+ "loss": 0.0061,
872
+ "step": 1380
873
+ },
874
+ {
875
+ "epoch": 4.36,
876
+ "learning_rate": 0.00013976240391334733,
877
+ "loss": 0.0043,
878
+ "step": 1390
879
+ },
880
+ {
881
+ "epoch": 4.39,
882
+ "learning_rate": 0.00013277428371767994,
883
+ "loss": 0.0046,
884
+ "step": 1400
885
+ },
886
+ {
887
+ "epoch": 4.42,
888
+ "learning_rate": 0.00012578616352201257,
889
+ "loss": 0.0102,
890
+ "step": 1410
891
+ },
892
+ {
893
+ "epoch": 4.45,
894
+ "learning_rate": 0.00011879804332634521,
895
+ "loss": 0.0061,
896
+ "step": 1420
897
+ },
898
+ {
899
+ "epoch": 4.49,
900
+ "learning_rate": 0.00011180992313067786,
901
+ "loss": 0.0089,
902
+ "step": 1430
903
+ },
904
+ {
905
+ "epoch": 4.52,
906
+ "learning_rate": 0.00010482180293501048,
907
+ "loss": 0.0038,
908
+ "step": 1440
909
+ },
910
+ {
911
+ "epoch": 4.55,
912
+ "learning_rate": 9.783368273934313e-05,
913
+ "loss": 0.0033,
914
+ "step": 1450
915
+ },
916
+ {
917
+ "epoch": 4.58,
918
+ "learning_rate": 9.084556254367575e-05,
919
+ "loss": 0.0078,
920
+ "step": 1460
921
+ },
922
+ {
923
+ "epoch": 4.61,
924
+ "learning_rate": 8.38574423480084e-05,
925
+ "loss": 0.006,
926
+ "step": 1470
927
+ },
928
+ {
929
+ "epoch": 4.64,
930
+ "learning_rate": 7.686932215234102e-05,
931
+ "loss": 0.0077,
932
+ "step": 1480
933
+ },
934
+ {
935
+ "epoch": 4.67,
936
+ "learning_rate": 6.988120195667366e-05,
937
+ "loss": 0.005,
938
+ "step": 1490
939
+ },
940
+ {
941
+ "epoch": 4.71,
942
+ "learning_rate": 6.289308176100629e-05,
943
+ "loss": 0.005,
944
+ "step": 1500
945
+ },
946
+ {
947
+ "epoch": 4.74,
948
+ "learning_rate": 5.590496156533893e-05,
949
+ "loss": 0.0046,
950
+ "step": 1510
951
+ },
952
+ {
953
+ "epoch": 4.77,
954
+ "learning_rate": 4.891684136967156e-05,
955
+ "loss": 0.0044,
956
+ "step": 1520
957
+ },
958
+ {
959
+ "epoch": 4.8,
960
+ "learning_rate": 4.19287211740042e-05,
961
+ "loss": 0.0051,
962
+ "step": 1530
963
+ },
964
+ {
965
+ "epoch": 4.83,
966
+ "learning_rate": 3.494060097833683e-05,
967
+ "loss": 0.0078,
968
+ "step": 1540
969
+ },
970
+ {
971
+ "epoch": 4.86,
972
+ "learning_rate": 2.7952480782669464e-05,
973
+ "loss": 0.0021,
974
+ "step": 1550
975
+ },
976
+ {
977
+ "epoch": 4.89,
978
+ "learning_rate": 2.09643605870021e-05,
979
+ "loss": 0.0043,
980
+ "step": 1560
981
+ },
982
+ {
983
+ "epoch": 4.93,
984
+ "learning_rate": 1.3976240391334732e-05,
985
+ "loss": 0.0041,
986
+ "step": 1570
987
+ },
988
+ {
989
+ "epoch": 4.96,
990
+ "learning_rate": 6.988120195667366e-06,
991
+ "loss": 0.0049,
992
+ "step": 1580
993
+ },
994
+ {
995
+ "epoch": 4.99,
996
+ "learning_rate": 0.0,
997
+ "loss": 0.0086,
998
+ "step": 1590
999
+ },
1000
+ {
1001
+ "epoch": 4.99,
1002
+ "eval_accuracy": 0.9168173598553345,
1003
+ "eval_loss": 0.18948502838611603,
1004
+ "eval_runtime": 224.532,
1005
+ "eval_samples_per_second": 24.629,
1006
+ "eval_steps_per_second": 0.77,
1007
+ "step": 1590
1008
+ },
1009
+ {
1010
+ "epoch": 4.99,
1011
+ "step": 1590,
1012
+ "total_flos": 2.5540381823039996e+19,
1013
+ "train_loss": 0.042007273906525575,
1014
+ "train_runtime": 10719.7329,
1015
+ "train_samples_per_second": 19.016,
1016
+ "train_steps_per_second": 0.148
1017
+ }
1018
+ ],
1019
+ "max_steps": 1590,
1020
+ "num_train_epochs": 5,
1021
+ "total_flos": 2.5540381823039996e+19,
1022
+ "trial_name": null,
1023
+ "trial_params": null
1024
+ }