csikasote commited on
Commit
fe6a75f
·
verified ·
1 Parent(s): 396da8b

End of training

Browse files
README.md CHANGED
@@ -3,6 +3,9 @@ library_name: transformers
3
  license: cc-by-nc-4.0
4
  base_model: facebook/mms-1b-all
5
  tags:
 
 
 
6
  - generated_from_trainer
7
  metrics:
8
  - wer
@@ -16,9 +19,9 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # mms-1b-bigcgen-male-5hrs-model
18
 
19
- This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.4407
22
  - Wer: 0.4520
23
 
24
  ## Model description
 
3
  license: cc-by-nc-4.0
4
  base_model: facebook/mms-1b-all
5
  tags:
6
+ - automatic-speech-recognition
7
+ - bigcgen
8
+ - mms
9
  - generated_from_trainer
10
  metrics:
11
  - wer
 
19
 
20
  # mms-1b-bigcgen-male-5hrs-model
21
 
22
+ This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on the BIGCGEN - BEM dataset.
23
  It achieves the following results on the evaluation set:
24
+ - Loss: 0.4408
25
  - Wer: 0.4520
26
 
27
  ## Model description
adapter.bem.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9140743aad5dec810fbd36760fcca33e9cadf20736ddb460086e654ddac60d85
3
+ size 8788284
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 7.763975155279503,
3
+ "eval_loss": 0.4407535195350647,
4
+ "eval_runtime": 32.0903,
5
+ "eval_samples": 441,
6
+ "eval_samples_per_second": 13.742,
7
+ "eval_steps_per_second": 3.459,
8
+ "eval_wer": 0.4520317383986535,
9
+ "total_flos": 1.301164200639248e+19,
10
+ "train_loss": 1.6717456512451172,
11
+ "train_runtime": 3684.5977,
12
+ "train_samples": 2573,
13
+ "train_samples_per_second": 20.949,
14
+ "train_steps_per_second": 2.622
15
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 7.763975155279503,
3
+ "eval_loss": 0.4407535195350647,
4
+ "eval_runtime": 32.0903,
5
+ "eval_samples": 441,
6
+ "eval_samples_per_second": 13.742,
7
+ "eval_steps_per_second": 3.459,
8
+ "eval_wer": 0.4520317383986535
9
+ }
runs/Dec27_22-45-03_srvrocgpu011.uct.ac.za/events.out.tfevents.1735336448.srvrocgpu011.uct.ac.za ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6fd52e59cdb83c772e367b0e15e5061fe1d2307a768020897ab179d39861f56
3
+ size 40
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 7.763975155279503,
3
+ "total_flos": 1.301164200639248e+19,
4
+ "train_loss": 1.6717456512451172,
5
+ "train_runtime": 3684.5977,
6
+ "train_samples": 2573,
7
+ "train_samples_per_second": 20.949,
8
+ "train_steps_per_second": 2.622
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,451 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.4343813359737396,
3
+ "best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-bigcgen-male-5hrs-model/checkpoint-2100",
4
+ "epoch": 7.763975155279503,
5
+ "eval_steps": 100,
6
+ "global_step": 2500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.3105590062111801,
13
+ "grad_norm": 7.037683486938477,
14
+ "learning_rate": 0.00028799999999999995,
15
+ "loss": 12.4451,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.3105590062111801,
20
+ "eval_loss": 1.20354163646698,
21
+ "eval_runtime": 31.8229,
22
+ "eval_samples_per_second": 13.858,
23
+ "eval_steps_per_second": 3.488,
24
+ "eval_wer": 0.8328925222409232,
25
+ "step": 100
26
+ },
27
+ {
28
+ "epoch": 0.6211180124223602,
29
+ "grad_norm": 3.7343695163726807,
30
+ "learning_rate": 0.00029698744769874474,
31
+ "loss": 1.6342,
32
+ "step": 200
33
+ },
34
+ {
35
+ "epoch": 0.6211180124223602,
36
+ "eval_loss": 0.6174936890602112,
37
+ "eval_runtime": 31.7812,
38
+ "eval_samples_per_second": 13.876,
39
+ "eval_steps_per_second": 3.493,
40
+ "eval_wer": 0.5758595816301996,
41
+ "step": 200
42
+ },
43
+ {
44
+ "epoch": 0.9316770186335404,
45
+ "grad_norm": 4.2912797927856445,
46
+ "learning_rate": 0.0002938493723849372,
47
+ "loss": 1.5403,
48
+ "step": 300
49
+ },
50
+ {
51
+ "epoch": 0.9316770186335404,
52
+ "eval_loss": 0.5695027709007263,
53
+ "eval_runtime": 31.6004,
54
+ "eval_samples_per_second": 13.956,
55
+ "eval_steps_per_second": 3.513,
56
+ "eval_wer": 0.5534984371243087,
57
+ "step": 300
58
+ },
59
+ {
60
+ "epoch": 1.2422360248447206,
61
+ "grad_norm": 1.983912467956543,
62
+ "learning_rate": 0.00029071129707112966,
63
+ "loss": 1.3778,
64
+ "step": 400
65
+ },
66
+ {
67
+ "epoch": 1.2422360248447206,
68
+ "eval_loss": 0.5523655414581299,
69
+ "eval_runtime": 31.7017,
70
+ "eval_samples_per_second": 13.911,
71
+ "eval_steps_per_second": 3.501,
72
+ "eval_wer": 0.5359461408992546,
73
+ "step": 400
74
+ },
75
+ {
76
+ "epoch": 1.5527950310559007,
77
+ "grad_norm": 1.4964157342910767,
78
+ "learning_rate": 0.00028757322175732215,
79
+ "loss": 1.4572,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 1.5527950310559007,
84
+ "eval_loss": 0.5301775336265564,
85
+ "eval_runtime": 32.0399,
86
+ "eval_samples_per_second": 13.764,
87
+ "eval_steps_per_second": 3.464,
88
+ "eval_wer": 0.5171916326039914,
89
+ "step": 500
90
+ },
91
+ {
92
+ "epoch": 1.8633540372670807,
93
+ "grad_norm": 3.8727850914001465,
94
+ "learning_rate": 0.0002844665271966527,
95
+ "loss": 1.4042,
96
+ "step": 600
97
+ },
98
+ {
99
+ "epoch": 1.8633540372670807,
100
+ "eval_loss": 0.5179265737533569,
101
+ "eval_runtime": 31.8425,
102
+ "eval_samples_per_second": 13.849,
103
+ "eval_steps_per_second": 3.486,
104
+ "eval_wer": 0.5265688867516229,
105
+ "step": 600
106
+ },
107
+ {
108
+ "epoch": 2.1739130434782608,
109
+ "grad_norm": 2.505019187927246,
110
+ "learning_rate": 0.0002813284518828452,
111
+ "loss": 1.4053,
112
+ "step": 700
113
+ },
114
+ {
115
+ "epoch": 2.1739130434782608,
116
+ "eval_loss": 0.5029146671295166,
117
+ "eval_runtime": 31.795,
118
+ "eval_samples_per_second": 13.87,
119
+ "eval_steps_per_second": 3.491,
120
+ "eval_wer": 0.5143063236354893,
121
+ "step": 700
122
+ },
123
+ {
124
+ "epoch": 2.4844720496894412,
125
+ "grad_norm": 2.7380497455596924,
126
+ "learning_rate": 0.0002781903765690376,
127
+ "loss": 1.2782,
128
+ "step": 800
129
+ },
130
+ {
131
+ "epoch": 2.4844720496894412,
132
+ "eval_loss": 0.4700861871242523,
133
+ "eval_runtime": 32.0844,
134
+ "eval_samples_per_second": 13.745,
135
+ "eval_steps_per_second": 3.46,
136
+ "eval_wer": 0.4864150036066362,
137
+ "step": 800
138
+ },
139
+ {
140
+ "epoch": 2.795031055900621,
141
+ "grad_norm": 1.6105161905288696,
142
+ "learning_rate": 0.0002750523012552301,
143
+ "loss": 1.2541,
144
+ "step": 900
145
+ },
146
+ {
147
+ "epoch": 2.795031055900621,
148
+ "eval_loss": 0.4585192799568176,
149
+ "eval_runtime": 32.0616,
150
+ "eval_samples_per_second": 13.755,
151
+ "eval_steps_per_second": 3.462,
152
+ "eval_wer": 0.48665544602067806,
153
+ "step": 900
154
+ },
155
+ {
156
+ "epoch": 3.1055900621118013,
157
+ "grad_norm": 3.0428338050842285,
158
+ "learning_rate": 0.0002719142259414226,
159
+ "loss": 1.1672,
160
+ "step": 1000
161
+ },
162
+ {
163
+ "epoch": 3.1055900621118013,
164
+ "eval_loss": 0.47279417514801025,
165
+ "eval_runtime": 31.7171,
166
+ "eval_samples_per_second": 13.904,
167
+ "eval_steps_per_second": 3.5,
168
+ "eval_wer": 0.4861745611925944,
169
+ "step": 1000
170
+ },
171
+ {
172
+ "epoch": 3.4161490683229814,
173
+ "grad_norm": 1.6724671125411987,
174
+ "learning_rate": 0.00026877615062761507,
175
+ "loss": 1.1205,
176
+ "step": 1100
177
+ },
178
+ {
179
+ "epoch": 3.4161490683229814,
180
+ "eval_loss": 0.4558369219303131,
181
+ "eval_runtime": 31.7372,
182
+ "eval_samples_per_second": 13.895,
183
+ "eval_steps_per_second": 3.497,
184
+ "eval_wer": 0.4794421735994229,
185
+ "step": 1100
186
+ },
187
+ {
188
+ "epoch": 3.7267080745341614,
189
+ "grad_norm": 4.677685260772705,
190
+ "learning_rate": 0.0002656380753138075,
191
+ "loss": 1.1699,
192
+ "step": 1200
193
+ },
194
+ {
195
+ "epoch": 3.7267080745341614,
196
+ "eval_loss": 0.4520164430141449,
197
+ "eval_runtime": 31.9065,
198
+ "eval_samples_per_second": 13.822,
199
+ "eval_steps_per_second": 3.479,
200
+ "eval_wer": 0.4811252704977158,
201
+ "step": 1200
202
+ },
203
+ {
204
+ "epoch": 4.037267080745342,
205
+ "grad_norm": 15.624258995056152,
206
+ "learning_rate": 0.0002625,
207
+ "loss": 1.2418,
208
+ "step": 1300
209
+ },
210
+ {
211
+ "epoch": 4.037267080745342,
212
+ "eval_loss": 0.4494916796684265,
213
+ "eval_runtime": 32.133,
214
+ "eval_samples_per_second": 13.724,
215
+ "eval_steps_per_second": 3.454,
216
+ "eval_wer": 0.4751142101466699,
217
+ "step": 1300
218
+ },
219
+ {
220
+ "epoch": 4.3478260869565215,
221
+ "grad_norm": 1.0827780961990356,
222
+ "learning_rate": 0.00025936192468619247,
223
+ "loss": 1.071,
224
+ "step": 1400
225
+ },
226
+ {
227
+ "epoch": 4.3478260869565215,
228
+ "eval_loss": 0.4487316310405731,
229
+ "eval_runtime": 31.845,
230
+ "eval_samples_per_second": 13.848,
231
+ "eval_steps_per_second": 3.486,
232
+ "eval_wer": 0.47367155566241886,
233
+ "step": 1400
234
+ },
235
+ {
236
+ "epoch": 4.658385093167702,
237
+ "grad_norm": 2.8134868144989014,
238
+ "learning_rate": 0.0002562238493723849,
239
+ "loss": 1.078,
240
+ "step": 1500
241
+ },
242
+ {
243
+ "epoch": 4.658385093167702,
244
+ "eval_loss": 0.4445899724960327,
245
+ "eval_runtime": 31.7402,
246
+ "eval_samples_per_second": 13.894,
247
+ "eval_steps_per_second": 3.497,
248
+ "eval_wer": 0.4760759798028372,
249
+ "step": 1500
250
+ },
251
+ {
252
+ "epoch": 4.9689440993788825,
253
+ "grad_norm": 3.0363829135894775,
254
+ "learning_rate": 0.0002530857740585774,
255
+ "loss": 1.2474,
256
+ "step": 1600
257
+ },
258
+ {
259
+ "epoch": 4.9689440993788825,
260
+ "eval_loss": 0.4437084496021271,
261
+ "eval_runtime": 32.0173,
262
+ "eval_samples_per_second": 13.774,
263
+ "eval_steps_per_second": 3.467,
264
+ "eval_wer": 0.46261120461649435,
265
+ "step": 1600
266
+ },
267
+ {
268
+ "epoch": 5.279503105590062,
269
+ "grad_norm": 1.907182216644287,
270
+ "learning_rate": 0.0002499476987447699,
271
+ "loss": 1.1127,
272
+ "step": 1700
273
+ },
274
+ {
275
+ "epoch": 5.279503105590062,
276
+ "eval_loss": 0.43802064657211304,
277
+ "eval_runtime": 32.2846,
278
+ "eval_samples_per_second": 13.66,
279
+ "eval_steps_per_second": 3.438,
280
+ "eval_wer": 0.46573695599903825,
281
+ "step": 1700
282
+ },
283
+ {
284
+ "epoch": 5.590062111801243,
285
+ "grad_norm": 4.59730339050293,
286
+ "learning_rate": 0.0002468096234309623,
287
+ "loss": 1.1761,
288
+ "step": 1800
289
+ },
290
+ {
291
+ "epoch": 5.590062111801243,
292
+ "eval_loss": 0.44803521037101746,
293
+ "eval_runtime": 31.8849,
294
+ "eval_samples_per_second": 13.831,
295
+ "eval_steps_per_second": 3.481,
296
+ "eval_wer": 0.4674200528973311,
297
+ "step": 1800
298
+ },
299
+ {
300
+ "epoch": 5.900621118012422,
301
+ "grad_norm": 2.9201653003692627,
302
+ "learning_rate": 0.0002436715481171548,
303
+ "loss": 1.0997,
304
+ "step": 1900
305
+ },
306
+ {
307
+ "epoch": 5.900621118012422,
308
+ "eval_loss": 0.4469524919986725,
309
+ "eval_runtime": 31.9803,
310
+ "eval_samples_per_second": 13.79,
311
+ "eval_steps_per_second": 3.471,
312
+ "eval_wer": 0.4652560711709546,
313
+ "step": 1900
314
+ },
315
+ {
316
+ "epoch": 6.211180124223603,
317
+ "grad_norm": 2.5859551429748535,
318
+ "learning_rate": 0.00024053347280334725,
319
+ "loss": 1.1203,
320
+ "step": 2000
321
+ },
322
+ {
323
+ "epoch": 6.211180124223603,
324
+ "eval_loss": 0.44207584857940674,
325
+ "eval_runtime": 32.0266,
326
+ "eval_samples_per_second": 13.77,
327
+ "eval_steps_per_second": 3.466,
328
+ "eval_wer": 0.46140899254628515,
329
+ "step": 2000
330
+ },
331
+ {
332
+ "epoch": 6.521739130434782,
333
+ "grad_norm": 2.423842191696167,
334
+ "learning_rate": 0.00023739539748953974,
335
+ "loss": 1.0749,
336
+ "step": 2100
337
+ },
338
+ {
339
+ "epoch": 6.521739130434782,
340
+ "eval_loss": 0.4343813359737396,
341
+ "eval_runtime": 32.257,
342
+ "eval_samples_per_second": 13.671,
343
+ "eval_steps_per_second": 3.441,
344
+ "eval_wer": 0.4505890839144025,
345
+ "step": 2100
346
+ },
347
+ {
348
+ "epoch": 6.832298136645963,
349
+ "grad_norm": 8.447519302368164,
350
+ "learning_rate": 0.0002342573221757322,
351
+ "loss": 1.1156,
352
+ "step": 2200
353
+ },
354
+ {
355
+ "epoch": 6.832298136645963,
356
+ "eval_loss": 0.43543142080307007,
357
+ "eval_runtime": 31.9424,
358
+ "eval_samples_per_second": 13.806,
359
+ "eval_steps_per_second": 3.475,
360
+ "eval_wer": 0.45106996874248617,
361
+ "step": 2200
362
+ },
363
+ {
364
+ "epoch": 7.142857142857143,
365
+ "grad_norm": 2.308629274368286,
366
+ "learning_rate": 0.00023111924686192465,
367
+ "loss": 1.0404,
368
+ "step": 2300
369
+ },
370
+ {
371
+ "epoch": 7.142857142857143,
372
+ "eval_loss": 0.43639999628067017,
373
+ "eval_runtime": 32.0384,
374
+ "eval_samples_per_second": 13.765,
375
+ "eval_steps_per_second": 3.465,
376
+ "eval_wer": 0.45347439288290453,
377
+ "step": 2300
378
+ },
379
+ {
380
+ "epoch": 7.453416149068323,
381
+ "grad_norm": 1.2582736015319824,
382
+ "learning_rate": 0.00022798117154811714,
383
+ "loss": 1.1081,
384
+ "step": 2400
385
+ },
386
+ {
387
+ "epoch": 7.453416149068323,
388
+ "eval_loss": 0.43768569827079773,
389
+ "eval_runtime": 31.9331,
390
+ "eval_samples_per_second": 13.81,
391
+ "eval_steps_per_second": 3.476,
392
+ "eval_wer": 0.45155085357056984,
393
+ "step": 2400
394
+ },
395
+ {
396
+ "epoch": 7.763975155279503,
397
+ "grad_norm": 21.993574142456055,
398
+ "learning_rate": 0.0002248430962343096,
399
+ "loss": 1.0535,
400
+ "step": 2500
401
+ },
402
+ {
403
+ "epoch": 7.763975155279503,
404
+ "eval_loss": 0.44070157408714294,
405
+ "eval_runtime": 32.2891,
406
+ "eval_samples_per_second": 13.658,
407
+ "eval_steps_per_second": 3.438,
408
+ "eval_wer": 0.4520317383986535,
409
+ "step": 2500
410
+ },
411
+ {
412
+ "epoch": 7.763975155279503,
413
+ "step": 2500,
414
+ "total_flos": 1.301164200639248e+19,
415
+ "train_loss": 1.6717456512451172,
416
+ "train_runtime": 3684.5977,
417
+ "train_samples_per_second": 20.949,
418
+ "train_steps_per_second": 2.622
419
+ }
420
+ ],
421
+ "logging_steps": 100,
422
+ "max_steps": 9660,
423
+ "num_input_tokens_seen": 0,
424
+ "num_train_epochs": 30,
425
+ "save_steps": 400,
426
+ "stateful_callbacks": {
427
+ "EarlyStoppingCallback": {
428
+ "args": {
429
+ "early_stopping_patience": 4,
430
+ "early_stopping_threshold": 0.0
431
+ },
432
+ "attributes": {
433
+ "early_stopping_patience_counter": 3
434
+ }
435
+ },
436
+ "TrainerControl": {
437
+ "args": {
438
+ "should_epoch_stop": false,
439
+ "should_evaluate": false,
440
+ "should_log": false,
441
+ "should_save": true,
442
+ "should_training_stop": false
443
+ },
444
+ "attributes": {}
445
+ }
446
+ },
447
+ "total_flos": 1.301164200639248e+19,
448
+ "train_batch_size": 4,
449
+ "trial_name": null,
450
+ "trial_params": null
451
+ }