csikasote commited on
Commit
97e535d
·
verified ·
1 Parent(s): d4c6ac5

End of training

Browse files
README.md CHANGED
@@ -3,6 +3,9 @@ library_name: transformers
3
  license: cc-by-nc-4.0
4
  base_model: facebook/mms-1b-all
5
  tags:
 
 
 
6
  - generated_from_trainer
7
  metrics:
8
  - wer
@@ -16,7 +19,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # mms-1b-bigcgen-male-30hrs-model
18
 
19
- This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.4418
22
  - Wer: 0.4513
 
3
  license: cc-by-nc-4.0
4
  base_model: facebook/mms-1b-all
5
  tags:
6
+ - automatic-speech-recognition
7
+ - bigcgen
8
+ - mms
9
  - generated_from_trainer
10
  metrics:
11
  - wer
 
19
 
20
  # mms-1b-bigcgen-male-30hrs-model
21
 
22
+ This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on the BIGCGEN - BEM dataset.
23
  It achieves the following results on the evaluation set:
24
  - Loss: 0.4418
25
  - Wer: 0.4513
adapter.bem.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e306bf2581df8524d16ee3c945db59c4d8e3786079b61ee1e95d4e38b417de98
3
+ size 8798532
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0869790318405383,
3
+ "eval_loss": 0.4418056011199951,
4
+ "eval_runtime": 30.974,
5
+ "eval_samples": 441,
6
+ "eval_samples_per_second": 14.238,
7
+ "eval_steps_per_second": 3.584,
8
+ "eval_wer": 0.45131041115652804,
9
+ "total_flos": 1.1010996680275614e+19,
10
+ "train_loss": 1.948471661522275,
11
+ "train_runtime": 3045.3624,
12
+ "train_samples": 15450,
13
+ "train_samples_per_second": 152.199,
14
+ "train_steps_per_second": 19.022
15
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0869790318405383,
3
+ "eval_loss": 0.4418056011199951,
4
+ "eval_runtime": 30.974,
5
+ "eval_samples": 441,
6
+ "eval_samples_per_second": 14.238,
7
+ "eval_steps_per_second": 3.584,
8
+ "eval_wer": 0.45131041115652804
9
+ }
runs/Dec28_01-44-16_srvrocgpu011.uct.ac.za/events.out.tfevents.1735346552.srvrocgpu011.uct.ac.za ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b05f5ea3e218b04561a72d13418c46d2029725f4c54bd53d41825c1671922561
3
+ size 40
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0869790318405383,
3
+ "total_flos": 1.1010996680275614e+19,
4
+ "train_loss": 1.948471661522275,
5
+ "train_runtime": 3045.3624,
6
+ "train_samples": 15450,
7
+ "train_samples_per_second": 152.199,
8
+ "train_steps_per_second": 19.022
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,387 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.4401148855686188,
3
+ "best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-bigcgen-male-30hrs-model/checkpoint-1700",
4
+ "epoch": 1.0869790318405383,
5
+ "eval_steps": 100,
6
+ "global_step": 2100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.05177323323841574,
13
+ "grad_norm": 5.1029253005981445,
14
+ "learning_rate": 0.000285,
15
+ "loss": 14.8177,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.05177323323841574,
20
+ "eval_loss": 0.9883018732070923,
21
+ "eval_runtime": 30.7186,
22
+ "eval_samples_per_second": 14.356,
23
+ "eval_steps_per_second": 3.613,
24
+ "eval_wer": 0.8360182736234671,
25
+ "step": 100
26
+ },
27
+ {
28
+ "epoch": 0.10354646647683148,
29
+ "grad_norm": 3.1198737621307373,
30
+ "learning_rate": 0.0002995071762061214,
31
+ "loss": 1.9066,
32
+ "step": 200
33
+ },
34
+ {
35
+ "epoch": 0.10354646647683148,
36
+ "eval_loss": 0.6110880374908447,
37
+ "eval_runtime": 30.676,
38
+ "eval_samples_per_second": 14.376,
39
+ "eval_steps_per_second": 3.618,
40
+ "eval_wer": 0.5657610002404424,
41
+ "step": 200
42
+ },
43
+ {
44
+ "epoch": 0.15531969971524723,
45
+ "grad_norm": 2.1564688682556152,
46
+ "learning_rate": 0.00029898841431782807,
47
+ "loss": 1.6259,
48
+ "step": 300
49
+ },
50
+ {
51
+ "epoch": 0.15531969971524723,
52
+ "eval_loss": 0.5918548107147217,
53
+ "eval_runtime": 30.4361,
54
+ "eval_samples_per_second": 14.489,
55
+ "eval_steps_per_second": 3.647,
56
+ "eval_wer": 0.5599903823034383,
57
+ "step": 300
58
+ },
59
+ {
60
+ "epoch": 0.20709293295366296,
61
+ "grad_norm": 2.736656665802002,
62
+ "learning_rate": 0.0002984696524295348,
63
+ "loss": 1.5344,
64
+ "step": 400
65
+ },
66
+ {
67
+ "epoch": 0.20709293295366296,
68
+ "eval_loss": 0.5518040657043457,
69
+ "eval_runtime": 30.3097,
70
+ "eval_samples_per_second": 14.55,
71
+ "eval_steps_per_second": 3.662,
72
+ "eval_wer": 0.5453233950468863,
73
+ "step": 400
74
+ },
75
+ {
76
+ "epoch": 0.2588661661920787,
77
+ "grad_norm": 7.7691874504089355,
78
+ "learning_rate": 0.00029795089054124156,
79
+ "loss": 1.5788,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 0.2588661661920787,
84
+ "eval_loss": 0.5321589112281799,
85
+ "eval_runtime": 30.6676,
86
+ "eval_samples_per_second": 14.38,
87
+ "eval_steps_per_second": 3.619,
88
+ "eval_wer": 0.5417167588362587,
89
+ "step": 500
90
+ },
91
+ {
92
+ "epoch": 0.31063939943049446,
93
+ "grad_norm": 2.1344313621520996,
94
+ "learning_rate": 0.00029743212865294825,
95
+ "loss": 1.3816,
96
+ "step": 600
97
+ },
98
+ {
99
+ "epoch": 0.31063939943049446,
100
+ "eval_loss": 0.48904091119766235,
101
+ "eval_runtime": 30.416,
102
+ "eval_samples_per_second": 14.499,
103
+ "eval_steps_per_second": 3.649,
104
+ "eval_wer": 0.5234431353690792,
105
+ "step": 600
106
+ },
107
+ {
108
+ "epoch": 0.36241263266891016,
109
+ "grad_norm": 8.843093872070312,
110
+ "learning_rate": 0.000296913366764655,
111
+ "loss": 1.3242,
112
+ "step": 700
113
+ },
114
+ {
115
+ "epoch": 0.36241263266891016,
116
+ "eval_loss": 0.47984835505485535,
117
+ "eval_runtime": 30.416,
118
+ "eval_samples_per_second": 14.499,
119
+ "eval_steps_per_second": 3.649,
120
+ "eval_wer": 0.5032459725895648,
121
+ "step": 700
122
+ },
123
+ {
124
+ "epoch": 0.4141858659073259,
125
+ "grad_norm": 2.9696879386901855,
126
+ "learning_rate": 0.00029639460487636173,
127
+ "loss": 1.301,
128
+ "step": 800
129
+ },
130
+ {
131
+ "epoch": 0.4141858659073259,
132
+ "eval_loss": 0.4812551736831665,
133
+ "eval_runtime": 30.5011,
134
+ "eval_samples_per_second": 14.458,
135
+ "eval_steps_per_second": 3.639,
136
+ "eval_wer": 0.5159894205337822,
137
+ "step": 800
138
+ },
139
+ {
140
+ "epoch": 0.46595909914574163,
141
+ "grad_norm": 3.925156593322754,
142
+ "learning_rate": 0.0002958758429880684,
143
+ "loss": 1.1288,
144
+ "step": 900
145
+ },
146
+ {
147
+ "epoch": 0.46595909914574163,
148
+ "eval_loss": 0.463159441947937,
149
+ "eval_runtime": 30.6981,
150
+ "eval_samples_per_second": 14.366,
151
+ "eval_steps_per_second": 3.616,
152
+ "eval_wer": 0.4895407549891801,
153
+ "step": 900
154
+ },
155
+ {
156
+ "epoch": 0.5177323323841574,
157
+ "grad_norm": 5.512630462646484,
158
+ "learning_rate": 0.00029535708109977517,
159
+ "loss": 1.2779,
160
+ "step": 1000
161
+ },
162
+ {
163
+ "epoch": 0.5177323323841574,
164
+ "eval_loss": 0.46061971783638,
165
+ "eval_runtime": 30.6206,
166
+ "eval_samples_per_second": 14.402,
167
+ "eval_steps_per_second": 3.625,
168
+ "eval_wer": 0.48545323395046885,
169
+ "step": 1000
170
+ },
171
+ {
172
+ "epoch": 0.5695055656225732,
173
+ "grad_norm": 2.1363961696624756,
174
+ "learning_rate": 0.0002948383192114819,
175
+ "loss": 1.172,
176
+ "step": 1100
177
+ },
178
+ {
179
+ "epoch": 0.5695055656225732,
180
+ "eval_loss": 0.45322486758232117,
181
+ "eval_runtime": 30.4814,
182
+ "eval_samples_per_second": 14.468,
183
+ "eval_steps_per_second": 3.642,
184
+ "eval_wer": 0.48352969463813417,
185
+ "step": 1100
186
+ },
187
+ {
188
+ "epoch": 0.6212787988609889,
189
+ "grad_norm": 3.3401312828063965,
190
+ "learning_rate": 0.0002943195573231886,
191
+ "loss": 1.2388,
192
+ "step": 1200
193
+ },
194
+ {
195
+ "epoch": 0.6212787988609889,
196
+ "eval_loss": 0.4610104262828827,
197
+ "eval_runtime": 30.4897,
198
+ "eval_samples_per_second": 14.464,
199
+ "eval_steps_per_second": 3.641,
200
+ "eval_wer": 0.4832892522240923,
201
+ "step": 1200
202
+ },
203
+ {
204
+ "epoch": 0.6730520320994046,
205
+ "grad_norm": 2.449324369430542,
206
+ "learning_rate": 0.00029380079543489535,
207
+ "loss": 1.2582,
208
+ "step": 1300
209
+ },
210
+ {
211
+ "epoch": 0.6730520320994046,
212
+ "eval_loss": 0.4502464532852173,
213
+ "eval_runtime": 30.8643,
214
+ "eval_samples_per_second": 14.288,
215
+ "eval_steps_per_second": 3.596,
216
+ "eval_wer": 0.4666987256552056,
217
+ "step": 1300
218
+ },
219
+ {
220
+ "epoch": 0.7248252653378203,
221
+ "grad_norm": 1.9429669380187988,
222
+ "learning_rate": 0.0002932820335466021,
223
+ "loss": 1.1046,
224
+ "step": 1400
225
+ },
226
+ {
227
+ "epoch": 0.7248252653378203,
228
+ "eval_loss": 0.46078842878341675,
229
+ "eval_runtime": 30.6937,
230
+ "eval_samples_per_second": 14.368,
231
+ "eval_steps_per_second": 3.616,
232
+ "eval_wer": 0.4695840346237076,
233
+ "step": 1400
234
+ },
235
+ {
236
+ "epoch": 0.7765984985762361,
237
+ "grad_norm": 4.626898765563965,
238
+ "learning_rate": 0.00029276327165830883,
239
+ "loss": 1.2732,
240
+ "step": 1500
241
+ },
242
+ {
243
+ "epoch": 0.7765984985762361,
244
+ "eval_loss": 0.4449756443500519,
245
+ "eval_runtime": 30.6809,
246
+ "eval_samples_per_second": 14.374,
247
+ "eval_steps_per_second": 3.618,
248
+ "eval_wer": 0.4695840346237076,
249
+ "step": 1500
250
+ },
251
+ {
252
+ "epoch": 0.8283717318146518,
253
+ "grad_norm": 1.7688357830047607,
254
+ "learning_rate": 0.0002922445097700155,
255
+ "loss": 1.2323,
256
+ "step": 1600
257
+ },
258
+ {
259
+ "epoch": 0.8283717318146518,
260
+ "eval_loss": 0.44945722818374634,
261
+ "eval_runtime": 30.5188,
262
+ "eval_samples_per_second": 14.45,
263
+ "eval_steps_per_second": 3.637,
264
+ "eval_wer": 0.463092089444578,
265
+ "step": 1600
266
+ },
267
+ {
268
+ "epoch": 0.8801449650530676,
269
+ "grad_norm": 3.03289794921875,
270
+ "learning_rate": 0.00029172574788172227,
271
+ "loss": 1.282,
272
+ "step": 1700
273
+ },
274
+ {
275
+ "epoch": 0.8801449650530676,
276
+ "eval_loss": 0.4401148855686188,
277
+ "eval_runtime": 30.7912,
278
+ "eval_samples_per_second": 14.322,
279
+ "eval_steps_per_second": 3.605,
280
+ "eval_wer": 0.4604472228901178,
281
+ "step": 1700
282
+ },
283
+ {
284
+ "epoch": 0.9319181982914833,
285
+ "grad_norm": 2.1416518688201904,
286
+ "learning_rate": 0.000291206985993429,
287
+ "loss": 1.1099,
288
+ "step": 1800
289
+ },
290
+ {
291
+ "epoch": 0.9319181982914833,
292
+ "eval_loss": 0.4439827501773834,
293
+ "eval_runtime": 30.5981,
294
+ "eval_samples_per_second": 14.413,
295
+ "eval_steps_per_second": 3.628,
296
+ "eval_wer": 0.46116855013224334,
297
+ "step": 1800
298
+ },
299
+ {
300
+ "epoch": 0.983691431529899,
301
+ "grad_norm": 3.6136231422424316,
302
+ "learning_rate": 0.0002906882241051357,
303
+ "loss": 1.1463,
304
+ "step": 1900
305
+ },
306
+ {
307
+ "epoch": 0.983691431529899,
308
+ "eval_loss": 0.44165703654289246,
309
+ "eval_runtime": 30.6823,
310
+ "eval_samples_per_second": 14.373,
311
+ "eval_steps_per_second": 3.618,
312
+ "eval_wer": 0.4688627073815821,
313
+ "step": 1900
314
+ },
315
+ {
316
+ "epoch": 1.0352057986021228,
317
+ "grad_norm": 2.6243932247161865,
318
+ "learning_rate": 0.00029016946221684244,
319
+ "loss": 1.12,
320
+ "step": 2000
321
+ },
322
+ {
323
+ "epoch": 1.0352057986021228,
324
+ "eval_loss": 0.44080641865730286,
325
+ "eval_runtime": 30.5757,
326
+ "eval_samples_per_second": 14.423,
327
+ "eval_steps_per_second": 3.63,
328
+ "eval_wer": 0.45491704736715555,
329
+ "step": 2000
330
+ },
331
+ {
332
+ "epoch": 1.0869790318405383,
333
+ "grad_norm": 3.724260091781616,
334
+ "learning_rate": 0.0002896507003285492,
335
+ "loss": 1.1037,
336
+ "step": 2100
337
+ },
338
+ {
339
+ "epoch": 1.0869790318405383,
340
+ "eval_loss": 0.4417917728424072,
341
+ "eval_runtime": 30.9792,
342
+ "eval_samples_per_second": 14.235,
343
+ "eval_steps_per_second": 3.583,
344
+ "eval_wer": 0.45131041115652804,
345
+ "step": 2100
346
+ },
347
+ {
348
+ "epoch": 1.0869790318405383,
349
+ "step": 2100,
350
+ "total_flos": 1.1010996680275614e+19,
351
+ "train_loss": 1.948471661522275,
352
+ "train_runtime": 3045.3624,
353
+ "train_samples_per_second": 152.199,
354
+ "train_steps_per_second": 19.022
355
+ }
356
+ ],
357
+ "logging_steps": 100,
358
+ "max_steps": 57930,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 30,
361
+ "save_steps": 400,
362
+ "stateful_callbacks": {
363
+ "EarlyStoppingCallback": {
364
+ "args": {
365
+ "early_stopping_patience": 4,
366
+ "early_stopping_threshold": 0.0
367
+ },
368
+ "attributes": {
369
+ "early_stopping_patience_counter": 3
370
+ }
371
+ },
372
+ "TrainerControl": {
373
+ "args": {
374
+ "should_epoch_stop": false,
375
+ "should_evaluate": false,
376
+ "should_log": false,
377
+ "should_save": true,
378
+ "should_training_stop": false
379
+ },
380
+ "attributes": {}
381
+ }
382
+ },
383
+ "total_flos": 1.1010996680275614e+19,
384
+ "train_batch_size": 4,
385
+ "trial_name": null,
386
+ "trial_params": null
387
+ }