cloudwalkerw commited on
Commit
3432413
·
1 Parent(s): c9fd8cf

End of training

Browse files
Files changed (5) hide show
  1. README.md +2 -1
  2. all_results.json +12 -0
  3. eval_results.json +8 -0
  4. train_results.json +7 -0
  5. trainer_state.json +613 -0
README.md CHANGED
@@ -1,6 +1,7 @@
1
  ---
2
  base_model: microsoft/wavlm-base
3
  tags:
 
4
  - generated_from_trainer
5
  metrics:
6
  - accuracy
@@ -16,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [microsoft/wavlm-base](https://huggingface.co/microsoft/wavlm-base) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 0.3325
20
  - Accuracy: 0.8974
21
 
22
  ## Model description
 
1
  ---
2
  base_model: microsoft/wavlm-base
3
  tags:
4
+ - audio-classification
5
  - generated_from_trainer
6
  metrics:
7
  - accuracy
 
17
 
18
  This model is a fine-tuned version of [microsoft/wavlm-base](https://huggingface.co/microsoft/wavlm-base) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.3295
21
  - Accuracy: 0.8974
22
 
23
  ## Model description
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.98,
3
+ "eval_accuracy": 0.8974400257607471,
4
+ "eval_loss": 0.3295176029205322,
5
+ "eval_runtime": 499.4668,
6
+ "eval_samples_per_second": 49.741,
7
+ "eval_steps_per_second": 24.871,
8
+ "train_loss": 0.3321091743430706,
9
+ "train_runtime": 22869.426,
10
+ "train_samples_per_second": 11.098,
11
+ "train_steps_per_second": 0.173
12
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.98,
3
+ "eval_accuracy": 0.8974400257607471,
4
+ "eval_loss": 0.3295176029205322,
5
+ "eval_runtime": 499.4668,
6
+ "eval_samples_per_second": 49.741,
7
+ "eval_steps_per_second": 24.871
8
+ }
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.98,
3
+ "train_loss": 0.3321091743430706,
4
+ "train_runtime": 22869.426,
5
+ "train_samples_per_second": 11.098,
6
+ "train_steps_per_second": 0.173
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,613 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8974400257607471,
3
+ "best_model_checkpoint": "/home/cloudwalker/ASVmodel/wavlm-base_3/checkpoint-100",
4
+ "epoch": 9.98109640831758,
5
+ "eval_steps": 100,
6
+ "global_step": 3960,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.25,
13
+ "learning_rate": 7.575757575757576e-05,
14
+ "loss": 0.4161,
15
+ "step": 100
16
+ },
17
+ {
18
+ "epoch": 0.25,
19
+ "eval_accuracy": 0.8974400257607471,
20
+ "eval_loss": 0.3295176029205322,
21
+ "eval_runtime": 497.7495,
22
+ "eval_samples_per_second": 49.913,
23
+ "eval_steps_per_second": 24.956,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 0.5,
28
+ "learning_rate": 0.00015151515151515152,
29
+ "loss": 0.3196,
30
+ "step": 200
31
+ },
32
+ {
33
+ "epoch": 0.5,
34
+ "eval_accuracy": 0.8974400257607471,
35
+ "eval_loss": 0.3312481641769409,
36
+ "eval_runtime": 499.0539,
37
+ "eval_samples_per_second": 49.782,
38
+ "eval_steps_per_second": 24.891,
39
+ "step": 200
40
+ },
41
+ {
42
+ "epoch": 0.76,
43
+ "learning_rate": 0.00022727272727272725,
44
+ "loss": 0.3391,
45
+ "step": 300
46
+ },
47
+ {
48
+ "epoch": 0.76,
49
+ "eval_accuracy": 0.8974400257607471,
50
+ "eval_loss": 0.3353486657142639,
51
+ "eval_runtime": 499.0444,
52
+ "eval_samples_per_second": 49.783,
53
+ "eval_steps_per_second": 24.892,
54
+ "step": 300
55
+ },
56
+ {
57
+ "epoch": 1.01,
58
+ "learning_rate": 0.00029966329966329963,
59
+ "loss": 0.3285,
60
+ "step": 400
61
+ },
62
+ {
63
+ "epoch": 1.01,
64
+ "eval_accuracy": 0.8974400257607471,
65
+ "eval_loss": 0.3321685791015625,
66
+ "eval_runtime": 499.1271,
67
+ "eval_samples_per_second": 49.775,
68
+ "eval_steps_per_second": 24.887,
69
+ "step": 400
70
+ },
71
+ {
72
+ "epoch": 1.26,
73
+ "learning_rate": 0.00029124579124579125,
74
+ "loss": 0.3354,
75
+ "step": 500
76
+ },
77
+ {
78
+ "epoch": 1.26,
79
+ "eval_accuracy": 0.8974400257607471,
80
+ "eval_loss": 0.3366284668445587,
81
+ "eval_runtime": 499.5555,
82
+ "eval_samples_per_second": 49.732,
83
+ "eval_steps_per_second": 24.866,
84
+ "step": 500
85
+ },
86
+ {
87
+ "epoch": 1.51,
88
+ "learning_rate": 0.0002828282828282828,
89
+ "loss": 0.3344,
90
+ "step": 600
91
+ },
92
+ {
93
+ "epoch": 1.51,
94
+ "eval_accuracy": 0.8974400257607471,
95
+ "eval_loss": 0.3315127491950989,
96
+ "eval_runtime": 499.6088,
97
+ "eval_samples_per_second": 49.727,
98
+ "eval_steps_per_second": 24.863,
99
+ "step": 600
100
+ },
101
+ {
102
+ "epoch": 1.76,
103
+ "learning_rate": 0.0002744107744107744,
104
+ "loss": 0.3343,
105
+ "step": 700
106
+ },
107
+ {
108
+ "epoch": 1.76,
109
+ "eval_accuracy": 0.8974400257607471,
110
+ "eval_loss": 0.33083751797676086,
111
+ "eval_runtime": 499.431,
112
+ "eval_samples_per_second": 49.745,
113
+ "eval_steps_per_second": 24.872,
114
+ "step": 700
115
+ },
116
+ {
117
+ "epoch": 2.02,
118
+ "learning_rate": 0.00026599326599326595,
119
+ "loss": 0.325,
120
+ "step": 800
121
+ },
122
+ {
123
+ "epoch": 2.02,
124
+ "eval_accuracy": 0.8974400257607471,
125
+ "eval_loss": 0.33818838000297546,
126
+ "eval_runtime": 499.7312,
127
+ "eval_samples_per_second": 49.715,
128
+ "eval_steps_per_second": 24.857,
129
+ "step": 800
130
+ },
131
+ {
132
+ "epoch": 2.27,
133
+ "learning_rate": 0.00025757575757575756,
134
+ "loss": 0.34,
135
+ "step": 900
136
+ },
137
+ {
138
+ "epoch": 2.27,
139
+ "eval_accuracy": 0.8974400257607471,
140
+ "eval_loss": 0.3314325511455536,
141
+ "eval_runtime": 499.9566,
142
+ "eval_samples_per_second": 49.692,
143
+ "eval_steps_per_second": 24.846,
144
+ "step": 900
145
+ },
146
+ {
147
+ "epoch": 2.52,
148
+ "learning_rate": 0.00024915824915824913,
149
+ "loss": 0.3333,
150
+ "step": 1000
151
+ },
152
+ {
153
+ "epoch": 2.52,
154
+ "eval_accuracy": 0.8974400257607471,
155
+ "eval_loss": 0.33876436948776245,
156
+ "eval_runtime": 499.9566,
157
+ "eval_samples_per_second": 49.692,
158
+ "eval_steps_per_second": 24.846,
159
+ "step": 1000
160
+ },
161
+ {
162
+ "epoch": 2.77,
163
+ "learning_rate": 0.00024074074074074072,
164
+ "loss": 0.318,
165
+ "step": 1100
166
+ },
167
+ {
168
+ "epoch": 2.77,
169
+ "eval_accuracy": 0.8974400257607471,
170
+ "eval_loss": 0.33707037568092346,
171
+ "eval_runtime": 499.5137,
172
+ "eval_samples_per_second": 49.736,
173
+ "eval_steps_per_second": 24.868,
174
+ "step": 1100
175
+ },
176
+ {
177
+ "epoch": 3.02,
178
+ "learning_rate": 0.0002323232323232323,
179
+ "loss": 0.3281,
180
+ "step": 1200
181
+ },
182
+ {
183
+ "epoch": 3.02,
184
+ "eval_accuracy": 0.8974400257607471,
185
+ "eval_loss": 0.33621686697006226,
186
+ "eval_runtime": 499.3401,
187
+ "eval_samples_per_second": 49.754,
188
+ "eval_steps_per_second": 24.877,
189
+ "step": 1200
190
+ },
191
+ {
192
+ "epoch": 3.28,
193
+ "learning_rate": 0.0002239057239057239,
194
+ "loss": 0.3293,
195
+ "step": 1300
196
+ },
197
+ {
198
+ "epoch": 3.28,
199
+ "eval_accuracy": 0.8974400257607471,
200
+ "eval_loss": 0.33068734407424927,
201
+ "eval_runtime": 499.4564,
202
+ "eval_samples_per_second": 49.742,
203
+ "eval_steps_per_second": 24.871,
204
+ "step": 1300
205
+ },
206
+ {
207
+ "epoch": 3.53,
208
+ "learning_rate": 0.00021548821548821544,
209
+ "loss": 0.3175,
210
+ "step": 1400
211
+ },
212
+ {
213
+ "epoch": 3.53,
214
+ "eval_accuracy": 0.8974400257607471,
215
+ "eval_loss": 0.33569779992103577,
216
+ "eval_runtime": 499.6949,
217
+ "eval_samples_per_second": 49.718,
218
+ "eval_steps_per_second": 24.859,
219
+ "step": 1400
220
+ },
221
+ {
222
+ "epoch": 3.78,
223
+ "learning_rate": 0.00020707070707070703,
224
+ "loss": 0.3415,
225
+ "step": 1500
226
+ },
227
+ {
228
+ "epoch": 3.78,
229
+ "eval_accuracy": 0.8974400257607471,
230
+ "eval_loss": 0.3321123421192169,
231
+ "eval_runtime": 499.567,
232
+ "eval_samples_per_second": 49.731,
233
+ "eval_steps_per_second": 24.866,
234
+ "step": 1500
235
+ },
236
+ {
237
+ "epoch": 4.03,
238
+ "learning_rate": 0.00019865319865319862,
239
+ "loss": 0.341,
240
+ "step": 1600
241
+ },
242
+ {
243
+ "epoch": 4.03,
244
+ "eval_accuracy": 0.8974400257607471,
245
+ "eval_loss": 0.33071625232696533,
246
+ "eval_runtime": 499.8939,
247
+ "eval_samples_per_second": 49.699,
248
+ "eval_steps_per_second": 24.849,
249
+ "step": 1600
250
+ },
251
+ {
252
+ "epoch": 4.28,
253
+ "learning_rate": 0.00019023569023569022,
254
+ "loss": 0.3285,
255
+ "step": 1700
256
+ },
257
+ {
258
+ "epoch": 4.28,
259
+ "eval_accuracy": 0.8974400257607471,
260
+ "eval_loss": 0.3307797610759735,
261
+ "eval_runtime": 500.1708,
262
+ "eval_samples_per_second": 49.671,
263
+ "eval_steps_per_second": 24.836,
264
+ "step": 1700
265
+ },
266
+ {
267
+ "epoch": 4.54,
268
+ "learning_rate": 0.0001818181818181818,
269
+ "loss": 0.3337,
270
+ "step": 1800
271
+ },
272
+ {
273
+ "epoch": 4.54,
274
+ "eval_accuracy": 0.8974400257607471,
275
+ "eval_loss": 0.330828994512558,
276
+ "eval_runtime": 499.7725,
277
+ "eval_samples_per_second": 49.711,
278
+ "eval_steps_per_second": 24.855,
279
+ "step": 1800
280
+ },
281
+ {
282
+ "epoch": 4.79,
283
+ "learning_rate": 0.0001734006734006734,
284
+ "loss": 0.3276,
285
+ "step": 1900
286
+ },
287
+ {
288
+ "epoch": 4.79,
289
+ "eval_accuracy": 0.8974400257607471,
290
+ "eval_loss": 0.33070385456085205,
291
+ "eval_runtime": 499.7607,
292
+ "eval_samples_per_second": 49.712,
293
+ "eval_steps_per_second": 24.856,
294
+ "step": 1900
295
+ },
296
+ {
297
+ "epoch": 5.04,
298
+ "learning_rate": 0.000164983164983165,
299
+ "loss": 0.3248,
300
+ "step": 2000
301
+ },
302
+ {
303
+ "epoch": 5.04,
304
+ "eval_accuracy": 0.8974400257607471,
305
+ "eval_loss": 0.33109623193740845,
306
+ "eval_runtime": 500.0337,
307
+ "eval_samples_per_second": 49.685,
308
+ "eval_steps_per_second": 24.842,
309
+ "step": 2000
310
+ },
311
+ {
312
+ "epoch": 5.29,
313
+ "learning_rate": 0.00015656565656565653,
314
+ "loss": 0.3371,
315
+ "step": 2100
316
+ },
317
+ {
318
+ "epoch": 5.29,
319
+ "eval_accuracy": 0.8974400257607471,
320
+ "eval_loss": 0.3317299485206604,
321
+ "eval_runtime": 499.7678,
322
+ "eval_samples_per_second": 49.711,
323
+ "eval_steps_per_second": 24.856,
324
+ "step": 2100
325
+ },
326
+ {
327
+ "epoch": 5.55,
328
+ "learning_rate": 0.00014814814814814812,
329
+ "loss": 0.3261,
330
+ "step": 2200
331
+ },
332
+ {
333
+ "epoch": 5.55,
334
+ "eval_accuracy": 0.8974400257607471,
335
+ "eval_loss": 0.33148789405822754,
336
+ "eval_runtime": 499.916,
337
+ "eval_samples_per_second": 49.696,
338
+ "eval_steps_per_second": 24.848,
339
+ "step": 2200
340
+ },
341
+ {
342
+ "epoch": 5.8,
343
+ "learning_rate": 0.0001397306397306397,
344
+ "loss": 0.3277,
345
+ "step": 2300
346
+ },
347
+ {
348
+ "epoch": 5.8,
349
+ "eval_accuracy": 0.8974400257607471,
350
+ "eval_loss": 0.33226969838142395,
351
+ "eval_runtime": 500.0285,
352
+ "eval_samples_per_second": 49.685,
353
+ "eval_steps_per_second": 24.843,
354
+ "step": 2300
355
+ },
356
+ {
357
+ "epoch": 6.05,
358
+ "learning_rate": 0.0001313131313131313,
359
+ "loss": 0.3297,
360
+ "step": 2400
361
+ },
362
+ {
363
+ "epoch": 6.05,
364
+ "eval_accuracy": 0.8974400257607471,
365
+ "eval_loss": 0.33209508657455444,
366
+ "eval_runtime": 499.9688,
367
+ "eval_samples_per_second": 49.691,
368
+ "eval_steps_per_second": 24.846,
369
+ "step": 2400
370
+ },
371
+ {
372
+ "epoch": 6.3,
373
+ "learning_rate": 0.0001228956228956229,
374
+ "loss": 0.3397,
375
+ "step": 2500
376
+ },
377
+ {
378
+ "epoch": 6.3,
379
+ "eval_accuracy": 0.8974400257607471,
380
+ "eval_loss": 0.3315936028957367,
381
+ "eval_runtime": 500.0781,
382
+ "eval_samples_per_second": 49.68,
383
+ "eval_steps_per_second": 24.84,
384
+ "step": 2500
385
+ },
386
+ {
387
+ "epoch": 6.55,
388
+ "learning_rate": 0.00011447811447811446,
389
+ "loss": 0.3313,
390
+ "step": 2600
391
+ },
392
+ {
393
+ "epoch": 6.55,
394
+ "eval_accuracy": 0.8974400257607471,
395
+ "eval_loss": 0.3375791013240814,
396
+ "eval_runtime": 500.3761,
397
+ "eval_samples_per_second": 49.651,
398
+ "eval_steps_per_second": 24.825,
399
+ "step": 2600
400
+ },
401
+ {
402
+ "epoch": 6.81,
403
+ "learning_rate": 0.00010606060606060605,
404
+ "loss": 0.3297,
405
+ "step": 2700
406
+ },
407
+ {
408
+ "epoch": 6.81,
409
+ "eval_accuracy": 0.8974400257607471,
410
+ "eval_loss": 0.3325986862182617,
411
+ "eval_runtime": 499.9451,
412
+ "eval_samples_per_second": 49.693,
413
+ "eval_steps_per_second": 24.847,
414
+ "step": 2700
415
+ },
416
+ {
417
+ "epoch": 7.06,
418
+ "learning_rate": 9.764309764309764e-05,
419
+ "loss": 0.3148,
420
+ "step": 2800
421
+ },
422
+ {
423
+ "epoch": 7.06,
424
+ "eval_accuracy": 0.8974400257607471,
425
+ "eval_loss": 0.3326033651828766,
426
+ "eval_runtime": 500.102,
427
+ "eval_samples_per_second": 49.678,
428
+ "eval_steps_per_second": 24.839,
429
+ "step": 2800
430
+ },
431
+ {
432
+ "epoch": 7.31,
433
+ "learning_rate": 8.92255892255892e-05,
434
+ "loss": 0.33,
435
+ "step": 2900
436
+ },
437
+ {
438
+ "epoch": 7.31,
439
+ "eval_accuracy": 0.8974400257607471,
440
+ "eval_loss": 0.33067989349365234,
441
+ "eval_runtime": 499.6928,
442
+ "eval_samples_per_second": 49.719,
443
+ "eval_steps_per_second": 24.859,
444
+ "step": 2900
445
+ },
446
+ {
447
+ "epoch": 7.56,
448
+ "learning_rate": 8.08080808080808e-05,
449
+ "loss": 0.3373,
450
+ "step": 3000
451
+ },
452
+ {
453
+ "epoch": 7.56,
454
+ "eval_accuracy": 0.8974400257607471,
455
+ "eval_loss": 0.3357817232608795,
456
+ "eval_runtime": 499.6901,
457
+ "eval_samples_per_second": 49.719,
458
+ "eval_steps_per_second": 24.859,
459
+ "step": 3000
460
+ },
461
+ {
462
+ "epoch": 7.81,
463
+ "learning_rate": 7.239057239057239e-05,
464
+ "loss": 0.3229,
465
+ "step": 3100
466
+ },
467
+ {
468
+ "epoch": 7.81,
469
+ "eval_accuracy": 0.8974400257607471,
470
+ "eval_loss": 0.331524521112442,
471
+ "eval_runtime": 499.59,
472
+ "eval_samples_per_second": 49.729,
473
+ "eval_steps_per_second": 24.864,
474
+ "step": 3100
475
+ },
476
+ {
477
+ "epoch": 8.07,
478
+ "learning_rate": 6.397306397306397e-05,
479
+ "loss": 0.3311,
480
+ "step": 3200
481
+ },
482
+ {
483
+ "epoch": 8.07,
484
+ "eval_accuracy": 0.8974400257607471,
485
+ "eval_loss": 0.3329682946205139,
486
+ "eval_runtime": 499.5794,
487
+ "eval_samples_per_second": 49.73,
488
+ "eval_steps_per_second": 24.865,
489
+ "step": 3200
490
+ },
491
+ {
492
+ "epoch": 8.32,
493
+ "learning_rate": 5.5555555555555545e-05,
494
+ "loss": 0.32,
495
+ "step": 3300
496
+ },
497
+ {
498
+ "epoch": 8.32,
499
+ "eval_accuracy": 0.8974400257607471,
500
+ "eval_loss": 0.33294886350631714,
501
+ "eval_runtime": 500.087,
502
+ "eval_samples_per_second": 49.679,
503
+ "eval_steps_per_second": 24.84,
504
+ "step": 3300
505
+ },
506
+ {
507
+ "epoch": 8.57,
508
+ "learning_rate": 4.7138047138047136e-05,
509
+ "loss": 0.3303,
510
+ "step": 3400
511
+ },
512
+ {
513
+ "epoch": 8.57,
514
+ "eval_accuracy": 0.8974400257607471,
515
+ "eval_loss": 0.33330321311950684,
516
+ "eval_runtime": 499.642,
517
+ "eval_samples_per_second": 49.724,
518
+ "eval_steps_per_second": 24.862,
519
+ "step": 3400
520
+ },
521
+ {
522
+ "epoch": 8.82,
523
+ "learning_rate": 3.8720538720538714e-05,
524
+ "loss": 0.3268,
525
+ "step": 3500
526
+ },
527
+ {
528
+ "epoch": 8.82,
529
+ "eval_accuracy": 0.8974400257607471,
530
+ "eval_loss": 0.332504540681839,
531
+ "eval_runtime": 499.7153,
532
+ "eval_samples_per_second": 49.716,
533
+ "eval_steps_per_second": 24.858,
534
+ "step": 3500
535
+ },
536
+ {
537
+ "epoch": 9.07,
538
+ "learning_rate": 3.03030303030303e-05,
539
+ "loss": 0.3362,
540
+ "step": 3600
541
+ },
542
+ {
543
+ "epoch": 9.07,
544
+ "eval_accuracy": 0.8974400257607471,
545
+ "eval_loss": 0.33143192529678345,
546
+ "eval_runtime": 499.7396,
547
+ "eval_samples_per_second": 49.714,
548
+ "eval_steps_per_second": 24.857,
549
+ "step": 3600
550
+ },
551
+ {
552
+ "epoch": 9.33,
553
+ "learning_rate": 2.1885521885521884e-05,
554
+ "loss": 0.3391,
555
+ "step": 3700
556
+ },
557
+ {
558
+ "epoch": 9.33,
559
+ "eval_accuracy": 0.8974400257607471,
560
+ "eval_loss": 0.33087798953056335,
561
+ "eval_runtime": 499.6009,
562
+ "eval_samples_per_second": 49.728,
563
+ "eval_steps_per_second": 24.864,
564
+ "step": 3700
565
+ },
566
+ {
567
+ "epoch": 9.58,
568
+ "learning_rate": 1.3468013468013465e-05,
569
+ "loss": 0.3233,
570
+ "step": 3800
571
+ },
572
+ {
573
+ "epoch": 9.58,
574
+ "eval_accuracy": 0.8974400257607471,
575
+ "eval_loss": 0.33190712332725525,
576
+ "eval_runtime": 499.6968,
577
+ "eval_samples_per_second": 49.718,
578
+ "eval_steps_per_second": 24.859,
579
+ "step": 3800
580
+ },
581
+ {
582
+ "epoch": 9.83,
583
+ "learning_rate": 5.05050505050505e-06,
584
+ "loss": 0.3196,
585
+ "step": 3900
586
+ },
587
+ {
588
+ "epoch": 9.83,
589
+ "eval_accuracy": 0.8974400257607471,
590
+ "eval_loss": 0.33247998356819153,
591
+ "eval_runtime": 499.604,
592
+ "eval_samples_per_second": 49.727,
593
+ "eval_steps_per_second": 24.864,
594
+ "step": 3900
595
+ },
596
+ {
597
+ "epoch": 9.98,
598
+ "step": 3960,
599
+ "total_flos": 1.506420070651024e+19,
600
+ "train_loss": 0.3321091743430706,
601
+ "train_runtime": 22869.426,
602
+ "train_samples_per_second": 11.098,
603
+ "train_steps_per_second": 0.173
604
+ }
605
+ ],
606
+ "logging_steps": 100,
607
+ "max_steps": 3960,
608
+ "num_train_epochs": 10,
609
+ "save_steps": 100,
610
+ "total_flos": 1.506420070651024e+19,
611
+ "trial_name": null,
612
+ "trial_params": null
613
+ }