nrshoudi commited on
Commit
12799c5
1 Parent(s): 56d56d5

End of training

Browse files
Files changed (3) hide show
  1. README.md +23 -23
  2. pytorch_model.bin +1 -1
  3. trainer_state.json +143 -143
README.md CHANGED
@@ -17,9 +17,9 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [facebook/wav2vec2-xls-r-300m](https://huggingface.co/facebook/wav2vec2-xls-r-300m) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.3264
21
- - Wer: 0.0417
22
- - Per: 0.0321
23
 
24
  ## Model description
25
 
@@ -51,26 +51,26 @@ The following hyperparameters were used during training:
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Wer | Per |
53
  |:-------------:|:-----:|:-----:|:---------------:|:------:|:------:|
54
- | 8.2138 | 1.0 | 1617 | 3.1149 | 1.0 | 1.0 |
55
- | 1.1507 | 2.0 | 3234 | 0.3763 | 0.0733 | 0.0570 |
56
- | 0.2 | 3.0 | 4851 | 0.3199 | 0.0634 | 0.0493 |
57
- | 0.1328 | 4.0 | 6468 | 0.3447 | 0.0530 | 0.0399 |
58
- | 0.1045 | 5.0 | 8085 | 0.3005 | 0.0514 | 0.0401 |
59
- | 0.088 | 6.0 | 9702 | 0.3276 | 0.0517 | 0.0391 |
60
- | 0.0756 | 7.0 | 11319 | 0.3367 | 0.0512 | 0.0388 |
61
- | 0.062 | 8.0 | 12936 | 0.3933 | 0.0572 | 0.0422 |
62
- | 0.0603 | 9.0 | 14553 | 0.3331 | 0.0475 | 0.0359 |
63
- | 0.0517 | 10.0 | 16170 | 0.3014 | 0.0500 | 0.0372 |
64
- | 0.046 | 11.0 | 17787 | 0.3067 | 0.0477 | 0.0360 |
65
- | 0.0398 | 12.0 | 19404 | 0.2990 | 0.0456 | 0.0348 |
66
- | 0.0329 | 13.0 | 21021 | 0.3064 | 0.0450 | 0.0342 |
67
- | 0.0319 | 14.0 | 22638 | 0.3336 | 0.0499 | 0.0383 |
68
- | 0.0288 | 15.0 | 24255 | 0.3222 | 0.0451 | 0.0358 |
69
- | 0.0258 | 16.0 | 25872 | 0.3302 | 0.0428 | 0.0324 |
70
- | 0.0181 | 17.0 | 27489 | 0.3408 | 0.0433 | 0.0334 |
71
- | 0.0172 | 18.0 | 29106 | 0.3216 | 0.0430 | 0.0331 |
72
- | 0.0145 | 19.0 | 30723 | 0.3254 | 0.0430 | 0.0329 |
73
- | 0.0127 | 20.0 | 32340 | 0.3264 | 0.0417 | 0.0321 |
74
 
75
 
76
  ### Framework versions
 
17
 
18
  This model is a fine-tuned version of [facebook/wav2vec2-xls-r-300m](https://huggingface.co/facebook/wav2vec2-xls-r-300m) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.3192
21
+ - Wer: 0.0429
22
+ - Per: 0.0326
23
 
24
  ## Model description
25
 
 
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Wer | Per |
53
  |:-------------:|:-----:|:-----:|:---------------:|:------:|:------:|
54
+ | 8.6256 | 1.0 | 1617 | 3.2158 | 1.0 | 1.0 |
55
+ | 1.3467 | 2.0 | 3234 | 0.3775 | 0.0753 | 0.0605 |
56
+ | 0.1996 | 3.0 | 4851 | 0.3054 | 0.0561 | 0.0421 |
57
+ | 0.1311 | 4.0 | 6468 | 0.3120 | 0.0507 | 0.0387 |
58
+ | 0.1044 | 5.0 | 8085 | 0.3169 | 0.0507 | 0.0386 |
59
+ | 0.0891 | 6.0 | 9702 | 0.3030 | 0.0496 | 0.0372 |
60
+ | 0.0788 | 7.0 | 11319 | 0.3174 | 0.0520 | 0.0391 |
61
+ | 0.0619 | 8.0 | 12936 | 0.3312 | 0.0546 | 0.0417 |
62
+ | 0.0552 | 9.0 | 14553 | 0.3353 | 0.0480 | 0.0358 |
63
+ | 0.0493 | 10.0 | 16170 | 0.2742 | 0.0452 | 0.0336 |
64
+ | 0.0417 | 11.0 | 17787 | 0.2894 | 0.0459 | 0.0355 |
65
+ | 0.0382 | 12.0 | 19404 | 0.2949 | 0.0463 | 0.0350 |
66
+ | 0.0306 | 13.0 | 21021 | 0.3041 | 0.0472 | 0.0348 |
67
+ | 0.03 | 14.0 | 22638 | 0.3109 | 0.0453 | 0.0339 |
68
+ | 0.0244 | 15.0 | 24255 | 0.3234 | 0.0448 | 0.0335 |
69
+ | 0.0215 | 16.0 | 25872 | 0.3321 | 0.0436 | 0.0332 |
70
+ | 0.0182 | 17.0 | 27489 | 0.3367 | 0.0420 | 0.0315 |
71
+ | 0.0171 | 18.0 | 29106 | 0.3317 | 0.0430 | 0.0328 |
72
+ | 0.0155 | 19.0 | 30723 | 0.3249 | 0.0423 | 0.0321 |
73
+ | 0.0132 | 20.0 | 32340 | 0.3192 | 0.0429 | 0.0326 |
74
 
75
 
76
  ### Framework versions
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29fe62faf88173276e1072187f5f576f7c13533622f9219344bfbaa4171491a9
3
  size 1262066346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d40387ddacca81291f2327e4d9f5fded1e572861e24264c59d83ba8f1a49dec0
3
  size 1262066346
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.29899266362190247,
3
- "best_model_checkpoint": "nrshoudi/wav2vec_arabic_mdd/checkpoint-19404",
4
  "epoch": 20.0,
5
  "eval_steps": 500,
6
  "global_step": 32340,
@@ -11,331 +11,331 @@
11
  {
12
  "epoch": 1.0,
13
  "learning_rate": 5e-05,
14
- "loss": 8.2138,
15
  "step": 1617
16
  },
17
  {
18
  "epoch": 1.0,
19
- "eval_loss": 3.1148788928985596,
20
  "eval_per": 1.0,
21
- "eval_runtime": 115.2576,
22
- "eval_samples_per_second": 7.054,
23
- "eval_steps_per_second": 3.531,
24
  "eval_wer": 1.0,
25
  "step": 1617
26
  },
27
  {
28
  "epoch": 2.0,
29
  "learning_rate": 0.0001,
30
- "loss": 1.1507,
31
  "step": 3234
32
  },
33
  {
34
  "epoch": 2.0,
35
- "eval_loss": 0.37626245617866516,
36
- "eval_per": 0.0569820032036182,
37
- "eval_runtime": 116.2275,
38
- "eval_samples_per_second": 6.995,
39
- "eval_steps_per_second": 3.502,
40
- "eval_wer": 0.0732583640520741,
41
  "step": 3234
42
  },
43
  {
44
  "epoch": 3.0,
45
  "learning_rate": 9.444444444444444e-05,
46
- "loss": 0.2,
47
  "step": 4851
48
  },
49
  {
50
  "epoch": 3.0,
51
- "eval_loss": 0.319857656955719,
52
- "eval_per": 0.04925562988787336,
53
- "eval_runtime": 115.4027,
54
- "eval_samples_per_second": 7.045,
55
- "eval_steps_per_second": 3.527,
56
- "eval_wer": 0.06341281321450236,
57
  "step": 4851
58
  },
59
  {
60
  "epoch": 4.0,
61
  "learning_rate": 8.888888888888889e-05,
62
- "loss": 0.1328,
63
  "step": 6468
64
  },
65
  {
66
  "epoch": 4.0,
67
- "eval_loss": 0.34467777609825134,
68
- "eval_per": 0.03985677942146424,
69
- "eval_runtime": 115.3838,
70
- "eval_samples_per_second": 7.046,
71
- "eval_steps_per_second": 3.527,
72
- "eval_wer": 0.05296066445802809,
73
  "step": 6468
74
  },
75
  {
76
  "epoch": 5.0,
77
  "learning_rate": 8.333333333333334e-05,
78
- "loss": 0.1045,
79
  "step": 8085
80
  },
81
  {
82
  "epoch": 5.0,
83
- "eval_loss": 0.30046939849853516,
84
- "eval_per": 0.04013945161594271,
85
- "eval_runtime": 116.1796,
86
- "eval_samples_per_second": 6.998,
87
- "eval_steps_per_second": 3.503,
88
- "eval_wer": 0.05142083897158322,
89
  "step": 8085
90
  },
91
  {
92
  "epoch": 6.0,
93
  "learning_rate": 7.777777777777778e-05,
94
- "loss": 0.088,
95
  "step": 9702
96
  },
97
  {
98
  "epoch": 6.0,
99
- "eval_loss": 0.32756975293159485,
100
- "eval_per": 0.03907943088664845,
101
- "eval_runtime": 116.0861,
102
- "eval_samples_per_second": 7.003,
103
- "eval_steps_per_second": 3.506,
104
- "eval_wer": 0.05165414586346881,
105
  "step": 9702
106
  },
107
  {
108
  "epoch": 7.0,
109
  "learning_rate": 7.222222222222222e-05,
110
- "loss": 0.0756,
111
  "step": 11319
112
  },
113
  {
114
  "epoch": 7.0,
115
- "eval_loss": 0.33674493432044983,
116
- "eval_per": 0.03879675869216998,
117
- "eval_runtime": 115.7009,
118
- "eval_samples_per_second": 7.027,
119
- "eval_steps_per_second": 3.518,
120
- "eval_wer": 0.05118753207969763,
121
  "step": 11319
122
  },
123
  {
124
  "epoch": 8.0,
125
  "learning_rate": 6.666666666666667e-05,
126
- "loss": 0.062,
127
  "step": 12936
128
  },
129
  {
130
  "epoch": 8.0,
131
- "eval_loss": 0.3933280110359192,
132
- "eval_per": 0.042165269009705075,
133
- "eval_runtime": 115.0602,
134
- "eval_samples_per_second": 7.066,
135
- "eval_steps_per_second": 3.537,
136
- "eval_wer": 0.05720684989034576,
137
  "step": 12936
138
  },
139
  {
140
  "epoch": 9.0,
141
  "learning_rate": 6.111111111111112e-05,
142
- "loss": 0.0603,
143
  "step": 14553
144
  },
145
  {
146
  "epoch": 9.0,
147
- "eval_loss": 0.3331439197063446,
148
- "eval_per": 0.03587581268255913,
149
- "eval_runtime": 115.3081,
150
- "eval_samples_per_second": 7.051,
151
- "eval_steps_per_second": 3.53,
152
- "eval_wer": 0.04754794456628249,
153
  "step": 14553
154
  },
155
  {
156
  "epoch": 10.0,
157
  "learning_rate": 5.555555555555556e-05,
158
- "loss": 0.0517,
159
  "step": 16170
160
  },
161
  {
162
  "epoch": 10.0,
163
- "eval_loss": 0.301434725522995,
164
- "eval_per": 0.03717139357391878,
165
- "eval_runtime": 115.3305,
166
- "eval_samples_per_second": 7.049,
167
- "eval_steps_per_second": 3.529,
168
- "eval_wer": 0.049974336241892583,
169
  "step": 16170
170
  },
171
  {
172
  "epoch": 11.0,
173
  "learning_rate": 5e-05,
174
- "loss": 0.046,
175
  "step": 17787
176
  },
177
  {
178
  "epoch": 11.0,
179
- "eval_loss": 0.30668768286705017,
180
- "eval_per": 0.036017148779798364,
181
- "eval_runtime": 115.2426,
182
- "eval_samples_per_second": 7.055,
183
- "eval_steps_per_second": 3.532,
184
- "eval_wer": 0.04768792870141384,
185
  "step": 17787
186
  },
187
  {
188
  "epoch": 12.0,
189
  "learning_rate": 4.4444444444444447e-05,
190
- "loss": 0.0398,
191
  "step": 19404
192
  },
193
  {
194
  "epoch": 12.0,
195
- "eval_loss": 0.29899266362190247,
196
- "eval_per": 0.03481579195326486,
197
- "eval_runtime": 115.6042,
198
- "eval_samples_per_second": 7.033,
199
- "eval_steps_per_second": 3.521,
200
- "eval_wer": 0.04558816667444356,
201
  "step": 19404
202
  },
203
  {
204
  "epoch": 13.0,
205
  "learning_rate": 3.888888888888889e-05,
206
- "loss": 0.0329,
207
  "step": 21021
208
  },
209
  {
210
  "epoch": 13.0,
211
- "eval_loss": 0.3064207434654236,
212
- "eval_per": 0.034226891548101386,
213
- "eval_runtime": 116.0675,
214
- "eval_samples_per_second": 7.005,
215
- "eval_steps_per_second": 3.507,
216
- "eval_wer": 0.04498156875554104,
217
  "step": 21021
218
  },
219
  {
220
  "epoch": 14.0,
221
  "learning_rate": 3.3333333333333335e-05,
222
- "loss": 0.0319,
223
  "step": 22638
224
  },
225
  {
226
  "epoch": 14.0,
227
- "eval_loss": 0.3336479663848877,
228
- "eval_per": 0.03825497031941958,
229
- "eval_runtime": 115.1932,
230
- "eval_samples_per_second": 7.058,
231
- "eval_steps_per_second": 3.533,
232
- "eval_wer": 0.04988101348513835,
233
  "step": 22638
234
  },
235
  {
236
  "epoch": 15.0,
237
  "learning_rate": 2.777777777777778e-05,
238
- "loss": 0.0288,
239
  "step": 24255
240
  },
241
  {
242
  "epoch": 15.0,
243
- "eval_loss": 0.32215777039527893,
244
- "eval_per": 0.03575803260152643,
245
- "eval_runtime": 115.0522,
246
- "eval_samples_per_second": 7.066,
247
- "eval_steps_per_second": 3.538,
248
- "eval_wer": 0.04512155289067239,
249
  "step": 24255
250
  },
251
  {
252
  "epoch": 16.0,
253
  "learning_rate": 2.2222222222222223e-05,
254
- "loss": 0.0258,
255
  "step": 25872
256
  },
257
  {
258
  "epoch": 16.0,
259
- "eval_loss": 0.33019357919692993,
260
- "eval_per": 0.03241307830019787,
261
- "eval_runtime": 115.355,
262
- "eval_samples_per_second": 7.048,
263
- "eval_steps_per_second": 3.528,
264
- "eval_wer": 0.042788483971816525,
265
  "step": 25872
266
  },
267
  {
268
  "epoch": 17.0,
269
  "learning_rate": 1.6666666666666667e-05,
270
- "loss": 0.0181,
271
  "step": 27489
272
  },
273
  {
274
  "epoch": 17.0,
275
- "eval_loss": 0.3408127427101135,
276
- "eval_per": 0.03335531894845944,
277
- "eval_runtime": 115.3097,
278
- "eval_samples_per_second": 7.051,
279
- "eval_steps_per_second": 3.53,
280
- "eval_wer": 0.0432550977555877,
281
  "step": 27489
282
  },
283
  {
284
  "epoch": 18.0,
285
  "learning_rate": 1.1111111111111112e-05,
286
- "loss": 0.0172,
287
  "step": 29106
288
  },
289
  {
290
  "epoch": 18.0,
291
- "eval_loss": 0.3215967118740082,
292
- "eval_per": 0.033119758786394046,
293
- "eval_runtime": 115.4137,
294
- "eval_samples_per_second": 7.044,
295
- "eval_steps_per_second": 3.526,
296
  "eval_wer": 0.043021790863702114,
297
  "step": 29106
298
  },
299
  {
300
  "epoch": 19.0,
301
  "learning_rate": 5.555555555555556e-06,
302
- "loss": 0.0145,
303
  "step": 30723
304
  },
305
  {
306
  "epoch": 19.0,
307
- "eval_loss": 0.3254193663597107,
308
- "eval_per": 0.03293131065674173,
309
- "eval_runtime": 115.2743,
310
- "eval_samples_per_second": 7.053,
311
- "eval_steps_per_second": 3.531,
312
- "eval_wer": 0.042975129485324996,
313
  "step": 30723
314
  },
315
  {
316
  "epoch": 20.0,
317
  "learning_rate": 0.0,
318
- "loss": 0.0127,
319
  "step": 32340
320
  },
321
  {
322
  "epoch": 20.0,
323
- "eval_loss": 0.32637345790863037,
324
- "eval_per": 0.0321304061057194,
325
- "eval_runtime": 115.2538,
326
- "eval_samples_per_second": 7.054,
327
- "eval_steps_per_second": 3.531,
328
- "eval_wer": 0.04166861089076571,
329
  "step": 32340
330
  },
331
  {
332
  "epoch": 20.0,
333
  "step": 32340,
334
  "total_flos": 1.0355998175044045e+19,
335
- "train_loss": 0.520349072083029,
336
- "train_runtime": 19017.9936,
337
- "train_samples_per_second": 3.401,
338
- "train_steps_per_second": 1.7
339
  }
340
  ],
341
  "logging_steps": 500,
 
1
  {
2
+ "best_metric": 0.2741861045360565,
3
+ "best_model_checkpoint": "nrshoudi/wav2vec_arabic_mdd/checkpoint-16170",
4
  "epoch": 20.0,
5
  "eval_steps": 500,
6
  "global_step": 32340,
 
11
  {
12
  "epoch": 1.0,
13
  "learning_rate": 5e-05,
14
+ "loss": 8.6256,
15
  "step": 1617
16
  },
17
  {
18
  "epoch": 1.0,
19
+ "eval_loss": 3.215786933898926,
20
  "eval_per": 1.0,
21
+ "eval_runtime": 116.0176,
22
+ "eval_samples_per_second": 7.008,
23
+ "eval_steps_per_second": 3.508,
24
  "eval_wer": 1.0,
25
  "step": 1617
26
  },
27
  {
28
  "epoch": 2.0,
29
  "learning_rate": 0.0001,
30
+ "loss": 1.3467,
31
  "step": 3234
32
  },
33
  {
34
  "epoch": 2.0,
35
+ "eval_loss": 0.37750244140625,
36
+ "eval_per": 0.060468293602185995,
37
+ "eval_runtime": 116.9742,
38
+ "eval_samples_per_second": 6.95,
39
+ "eval_steps_per_second": 3.479,
40
+ "eval_wer": 0.07531146470066725,
41
  "step": 3234
42
  },
43
  {
44
  "epoch": 3.0,
45
  "learning_rate": 9.444444444444444e-05,
46
+ "loss": 0.1996,
47
  "step": 4851
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "eval_loss": 0.30538317561149597,
52
+ "eval_per": 0.042094600961085464,
53
+ "eval_runtime": 117.0156,
54
+ "eval_samples_per_second": 6.948,
55
+ "eval_steps_per_second": 3.478,
56
+ "eval_wer": 0.056133638187672065,
57
  "step": 4851
58
  },
59
  {
60
  "epoch": 4.0,
61
  "learning_rate": 8.888888888888889e-05,
62
+ "loss": 0.1311,
63
  "step": 6468
64
  },
65
  {
66
  "epoch": 4.0,
67
+ "eval_loss": 0.31199753284454346,
68
+ "eval_per": 0.038655422594930745,
69
+ "eval_runtime": 116.6027,
70
+ "eval_samples_per_second": 6.972,
71
+ "eval_steps_per_second": 3.49,
72
+ "eval_wer": 0.05067425691754934,
73
  "step": 6468
74
  },
75
  {
76
  "epoch": 5.0,
77
  "learning_rate": 8.333333333333334e-05,
78
+ "loss": 0.1044,
79
  "step": 8085
80
  },
81
  {
82
  "epoch": 5.0,
83
+ "eval_loss": 0.31690752506256104,
84
+ "eval_per": 0.038608310562517664,
85
+ "eval_runtime": 118.0869,
86
+ "eval_samples_per_second": 6.885,
87
+ "eval_steps_per_second": 3.447,
88
+ "eval_wer": 0.05072091829592646,
89
  "step": 8085
90
  },
91
  {
92
  "epoch": 6.0,
93
  "learning_rate": 7.777777777777778e-05,
94
+ "loss": 0.0891,
95
  "step": 9702
96
  },
97
  {
98
  "epoch": 6.0,
99
+ "eval_loss": 0.30304601788520813,
100
+ "eval_per": 0.03721850560633186,
101
+ "eval_runtime": 118.602,
102
+ "eval_samples_per_second": 6.855,
103
+ "eval_steps_per_second": 3.432,
104
+ "eval_wer": 0.04960104521487565,
105
  "step": 9702
106
  },
107
  {
108
  "epoch": 7.0,
109
  "learning_rate": 7.222222222222222e-05,
110
+ "loss": 0.0788,
111
  "step": 11319
112
  },
113
  {
114
  "epoch": 7.0,
115
+ "eval_loss": 0.31736186146736145,
116
+ "eval_per": 0.03910298690285499,
117
+ "eval_runtime": 118.2233,
118
+ "eval_samples_per_second": 6.877,
119
+ "eval_steps_per_second": 3.443,
120
+ "eval_wer": 0.052027436890485744,
121
  "step": 11319
122
  },
123
  {
124
  "epoch": 8.0,
125
  "learning_rate": 6.666666666666667e-05,
126
+ "loss": 0.0619,
127
  "step": 12936
128
  },
129
  {
130
  "epoch": 8.0,
131
+ "eval_loss": 0.33120226860046387,
132
+ "eval_per": 0.0416941486855743,
133
+ "eval_runtime": 118.3706,
134
+ "eval_samples_per_second": 6.868,
135
+ "eval_steps_per_second": 3.438,
136
+ "eval_wer": 0.05459381270122719,
137
  "step": 12936
138
  },
139
  {
140
  "epoch": 9.0,
141
  "learning_rate": 6.111111111111112e-05,
142
+ "loss": 0.0552,
143
  "step": 14553
144
  },
145
  {
146
  "epoch": 9.0,
147
+ "eval_loss": 0.3352554142475128,
148
+ "eval_per": 0.03575803260152643,
149
+ "eval_runtime": 119.3222,
150
+ "eval_samples_per_second": 6.813,
151
+ "eval_steps_per_second": 3.411,
152
+ "eval_wer": 0.04796789697167654,
153
  "step": 14553
154
  },
155
  {
156
  "epoch": 10.0,
157
  "learning_rate": 5.555555555555556e-05,
158
+ "loss": 0.0493,
159
  "step": 16170
160
  },
161
  {
162
  "epoch": 10.0,
163
+ "eval_loss": 0.2741861045360565,
164
+ "eval_per": 0.033567323094318285,
165
+ "eval_runtime": 119.9093,
166
+ "eval_samples_per_second": 6.78,
167
+ "eval_steps_per_second": 3.394,
168
+ "eval_wer": 0.04521487564742663,
169
  "step": 16170
170
  },
171
  {
172
  "epoch": 11.0,
173
  "learning_rate": 5e-05,
174
+ "loss": 0.0417,
175
  "step": 17787
176
  },
177
  {
178
  "epoch": 11.0,
179
+ "eval_loss": 0.28943416476249695,
180
+ "eval_per": 0.03547536040704796,
181
+ "eval_runtime": 120.1333,
182
+ "eval_samples_per_second": 6.767,
183
+ "eval_steps_per_second": 3.388,
184
+ "eval_wer": 0.04586813494470627,
185
  "step": 17787
186
  },
187
  {
188
  "epoch": 12.0,
189
  "learning_rate": 4.4444444444444447e-05,
190
+ "loss": 0.0382,
191
  "step": 19404
192
  },
193
  {
194
  "epoch": 12.0,
195
+ "eval_loss": 0.29486820101737976,
196
+ "eval_per": 0.03500424008291718,
197
+ "eval_runtime": 119.6619,
198
+ "eval_samples_per_second": 6.794,
199
+ "eval_steps_per_second": 3.401,
200
+ "eval_wer": 0.04633474872847744,
201
  "step": 19404
202
  },
203
  {
204
  "epoch": 13.0,
205
  "learning_rate": 3.888888888888889e-05,
206
+ "loss": 0.0306,
207
  "step": 21021
208
  },
209
  {
210
  "epoch": 13.0,
211
+ "eval_loss": 0.30409368872642517,
212
+ "eval_per": 0.034792235937058325,
213
+ "eval_runtime": 120.2788,
214
+ "eval_samples_per_second": 6.759,
215
+ "eval_steps_per_second": 3.384,
216
+ "eval_wer": 0.04717465353926555,
217
  "step": 21021
218
  },
219
  {
220
  "epoch": 14.0,
221
  "learning_rate": 3.3333333333333335e-05,
222
+ "loss": 0.03,
223
  "step": 22638
224
  },
225
  {
226
  "epoch": 14.0,
227
+ "eval_loss": 0.3109176456928253,
228
+ "eval_per": 0.033920663337416376,
229
+ "eval_runtime": 118.5483,
230
+ "eval_samples_per_second": 6.858,
231
+ "eval_steps_per_second": 3.433,
232
+ "eval_wer": 0.045261537025803746,
233
  "step": 22638
234
  },
235
  {
236
  "epoch": 15.0,
237
  "learning_rate": 2.777777777777778e-05,
238
+ "loss": 0.0244,
239
  "step": 24255
240
  },
241
  {
242
  "epoch": 15.0,
243
+ "eval_loss": 0.3234339952468872,
244
+ "eval_per": 0.03347309902949213,
245
+ "eval_runtime": 118.5882,
246
+ "eval_samples_per_second": 6.856,
247
+ "eval_steps_per_second": 3.432,
248
+ "eval_wer": 0.044841584620409686,
249
  "step": 24255
250
  },
251
  {
252
  "epoch": 16.0,
253
  "learning_rate": 2.2222222222222223e-05,
254
+ "loss": 0.0215,
255
  "step": 25872
256
  },
257
  {
258
  "epoch": 16.0,
259
+ "eval_loss": 0.33212560415267944,
260
+ "eval_per": 0.03316687081880712,
261
+ "eval_runtime": 118.6022,
262
+ "eval_samples_per_second": 6.855,
263
+ "eval_steps_per_second": 3.432,
264
+ "eval_wer": 0.04362838878260464,
265
  "step": 25872
266
  },
267
  {
268
  "epoch": 17.0,
269
  "learning_rate": 1.6666666666666667e-05,
270
+ "loss": 0.0182,
271
  "step": 27489
272
  },
273
  {
274
  "epoch": 17.0,
275
+ "eval_loss": 0.3367431163787842,
276
+ "eval_per": 0.03154150570055592,
277
+ "eval_runtime": 118.0708,
278
+ "eval_samples_per_second": 6.886,
279
+ "eval_steps_per_second": 3.447,
280
+ "eval_wer": 0.04204190191778265,
281
  "step": 27489
282
  },
283
  {
284
  "epoch": 18.0,
285
  "learning_rate": 1.1111111111111112e-05,
286
+ "loss": 0.0171,
287
  "step": 29106
288
  },
289
  {
290
  "epoch": 18.0,
291
+ "eval_loss": 0.3317033350467682,
292
+ "eval_per": 0.032766418543295955,
293
+ "eval_runtime": 117.8323,
294
+ "eval_samples_per_second": 6.9,
295
+ "eval_steps_per_second": 3.454,
296
  "eval_wer": 0.043021790863702114,
297
  "step": 29106
298
  },
299
  {
300
  "epoch": 19.0,
301
  "learning_rate": 5.555555555555556e-06,
302
+ "loss": 0.0155,
303
  "step": 30723
304
  },
305
  {
306
  "epoch": 19.0,
307
+ "eval_loss": 0.3248533606529236,
308
+ "eval_per": 0.03210685008951286,
309
+ "eval_runtime": 118.4543,
310
+ "eval_samples_per_second": 6.863,
311
+ "eval_steps_per_second": 3.436,
312
+ "eval_wer": 0.04227520880966824,
313
  "step": 30723
314
  },
315
  {
316
  "epoch": 20.0,
317
  "learning_rate": 0.0,
318
+ "loss": 0.0132,
319
  "step": 32340
320
  },
321
  {
322
  "epoch": 20.0,
323
+ "eval_loss": 0.31921908259391785,
324
+ "eval_per": 0.032577970413643645,
325
+ "eval_runtime": 118.1674,
326
+ "eval_samples_per_second": 6.88,
327
+ "eval_steps_per_second": 3.444,
328
+ "eval_wer": 0.04292846810694788,
329
  "step": 32340
330
  },
331
  {
332
  "epoch": 20.0,
333
  "step": 32340,
334
  "total_flos": 1.0355998175044045e+19,
335
+ "train_loss": 0.54961431122298,
336
+ "train_runtime": 19355.5909,
337
+ "train_samples_per_second": 3.342,
338
+ "train_steps_per_second": 1.671
339
  }
340
  ],
341
  "logging_steps": 500,