infinitejoy commited on
Commit
65d8195
·
1 Parent(s): 0d12d1f

End of training

Browse files
Files changed (6) hide show
  1. all_results.json +14 -0
  2. eval.py +137 -0
  3. eval_results.json +9 -0
  4. pytorch_model.bin +1 -1
  5. train_results.json +8 -0
  6. trainer_state.json +829 -0
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 50.0,
3
+ "eval_loss": 0.17219851911067963,
4
+ "eval_runtime": 164.1427,
5
+ "eval_samples": 3431,
6
+ "eval_samples_per_second": 20.903,
7
+ "eval_steps_per_second": 20.903,
8
+ "eval_wer": 0.24859002169197397,
9
+ "train_loss": 1.3891823223876953,
10
+ "train_runtime": 18203.5679,
11
+ "train_samples": 7989,
12
+ "train_samples_per_second": 21.944,
13
+ "train_steps_per_second": 0.687
14
+ }
eval.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import argparse
3
+ import re
4
+ from typing import Dict
5
+
6
+ import torch
7
+ from datasets import Audio, Dataset, load_dataset, load_metric
8
+
9
+ from transformers import AutoFeatureExtractor, pipeline
10
+
11
+
12
+ def log_results(result: Dataset, args: Dict[str, str]):
13
+ """DO NOT CHANGE. This function computes and logs the result metrics."""
14
+
15
+ log_outputs = args.log_outputs
16
+ dataset_id = "_".join(args.dataset.split("/") + [args.config, args.split])
17
+
18
+ # load metric
19
+ wer = load_metric("wer")
20
+ cer = load_metric("cer")
21
+
22
+ # compute metrics
23
+ wer_result = wer.compute(references=result["target"], predictions=result["prediction"])
24
+ cer_result = cer.compute(references=result["target"], predictions=result["prediction"])
25
+
26
+ # print & log results
27
+ result_str = f"WER: {wer_result}\n" f"CER: {cer_result}"
28
+ print(result_str)
29
+
30
+ with open(f"{dataset_id}_eval_results.txt", "w") as f:
31
+ f.write(result_str)
32
+
33
+ # log all results in text file. Possibly interesting for analysis
34
+ if log_outputs is not None:
35
+ pred_file = f"log_{dataset_id}_predictions.txt"
36
+ target_file = f"log_{dataset_id}_targets.txt"
37
+
38
+ with open(pred_file, "w") as p, open(target_file, "w") as t:
39
+
40
+ # mapping function to write output
41
+ def write_to_file(batch, i):
42
+ p.write(f"{i}" + "\n")
43
+ p.write(batch["prediction"] + "\n")
44
+ t.write(f"{i}" + "\n")
45
+ t.write(batch["target"] + "\n")
46
+
47
+ result.map(write_to_file, with_indices=True)
48
+
49
+
50
+ def normalize_text(text: str) -> str:
51
+ """DO ADAPT FOR YOUR USE CASE. this function normalizes the target text."""
52
+
53
+ chars_to_ignore_regex = '[,?.!\-\;\:"“%‘”�—’…–]' # noqa: W605 IMPORTANT: this should correspond to the chars that were ignored during training
54
+
55
+ text = re.sub(chars_to_ignore_regex, "", text.lower())
56
+
57
+ # In addition, we can normalize the target text, e.g. removing new lines characters etc...
58
+ # note that order is important here!
59
+ token_sequences_to_ignore = ["\n\n", "\n", " ", " "]
60
+
61
+ for t in token_sequences_to_ignore:
62
+ text = " ".join(text.split(t))
63
+
64
+ return text
65
+
66
+
67
+ def main(args):
68
+ # load dataset
69
+ dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
70
+
71
+ # for testing: only process the first two examples as a test
72
+ # dataset = dataset.select(range(10))
73
+
74
+ # load processor
75
+ feature_extractor = AutoFeatureExtractor.from_pretrained(args.model_id)
76
+ sampling_rate = feature_extractor.sampling_rate
77
+
78
+ # resample audio
79
+ dataset = dataset.cast_column("audio", Audio(sampling_rate=sampling_rate))
80
+
81
+ # load eval pipeline
82
+ if args.device is None:
83
+ args.device = 0 if torch.cuda.is_available() else -1
84
+ asr = pipeline("automatic-speech-recognition", model=args.model_id, device=args.device)
85
+
86
+ # map function to decode audio
87
+ def map_to_pred(batch):
88
+ prediction = asr(
89
+ batch["audio"]["array"], chunk_length_s=args.chunk_length_s, stride_length_s=args.stride_length_s
90
+ )
91
+
92
+ batch["prediction"] = prediction["text"]
93
+ batch["target"] = normalize_text(batch["sentence"])
94
+ return batch
95
+
96
+ # run inference on all examples
97
+ result = dataset.map(map_to_pred, remove_columns=dataset.column_names)
98
+
99
+ # compute and log_results
100
+ # do not change function below
101
+ log_results(result, args)
102
+
103
+
104
+ if __name__ == "__main__":
105
+ parser = argparse.ArgumentParser()
106
+
107
+ parser.add_argument(
108
+ "--model_id", type=str, required=True, help="Model identifier. Should be loadable with 🤗 Transformers"
109
+ )
110
+ parser.add_argument(
111
+ "--dataset",
112
+ type=str,
113
+ required=True,
114
+ help="Dataset name to evaluate the `model_id`. Should be loadable with 🤗 Datasets",
115
+ )
116
+ parser.add_argument(
117
+ "--config", type=str, required=True, help="Config of the dataset. *E.g.* `'en'` for Common Voice"
118
+ )
119
+ parser.add_argument("--split", type=str, required=True, help="Split of the dataset. *E.g.* `'test'`")
120
+ parser.add_argument(
121
+ "--chunk_length_s", type=float, default=None, help="Chunk length in seconds. Defaults to 5 seconds."
122
+ )
123
+ parser.add_argument(
124
+ "--stride_length_s", type=float, default=None, help="Stride of the audio chunks. Defaults to 1 second."
125
+ )
126
+ parser.add_argument(
127
+ "--log_outputs", action="store_true", help="If defined, write outputs to log file for analysis."
128
+ )
129
+ parser.add_argument(
130
+ "--device",
131
+ type=int,
132
+ default=None,
133
+ help="The device to run the pipeline on. -1 for CPU (default), 0 for the first GPU and so on.",
134
+ )
135
+ args = parser.parse_args()
136
+
137
+ main(args)
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 50.0,
3
+ "eval_loss": 0.17219851911067963,
4
+ "eval_runtime": 164.1427,
5
+ "eval_samples": 3431,
6
+ "eval_samples_per_second": 20.903,
7
+ "eval_steps_per_second": 20.903,
8
+ "eval_wer": 0.24859002169197397
9
+ }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db9fe401ae31d8395b6c85d7bcaed07509933e73acbdf250b77dd1bd223f3b52
3
  size 1262091761
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d20bad08b17a254665e11c7ac0712e0d91963613aafcbaed4f5be42438f5800
3
  size 1262091761
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 50.0,
3
+ "train_loss": 1.3891823223876953,
4
+ "train_runtime": 18203.5679,
5
+ "train_samples": 7989,
6
+ "train_samples_per_second": 21.944,
7
+ "train_steps_per_second": 0.687
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,829 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 50.0,
5
+ "global_step": 12500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.4,
12
+ "learning_rate": 3.4299999999999998e-06,
13
+ "loss": 11.1542,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.8,
18
+ "learning_rate": 6.93e-06,
19
+ "loss": 6.4515,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 1.2,
24
+ "learning_rate": 1.0429999999999998e-05,
25
+ "loss": 4.2451,
26
+ "step": 300
27
+ },
28
+ {
29
+ "epoch": 1.6,
30
+ "learning_rate": 1.3929999999999999e-05,
31
+ "loss": 3.7284,
32
+ "step": 400
33
+ },
34
+ {
35
+ "epoch": 2.0,
36
+ "learning_rate": 1.7429999999999997e-05,
37
+ "loss": 3.4154,
38
+ "step": 500
39
+ },
40
+ {
41
+ "epoch": 2.4,
42
+ "learning_rate": 2.0929999999999998e-05,
43
+ "loss": 3.2212,
44
+ "step": 600
45
+ },
46
+ {
47
+ "epoch": 2.8,
48
+ "learning_rate": 2.4429999999999995e-05,
49
+ "loss": 3.1286,
50
+ "step": 700
51
+ },
52
+ {
53
+ "epoch": 3.2,
54
+ "learning_rate": 2.793e-05,
55
+ "loss": 3.0927,
56
+ "step": 800
57
+ },
58
+ {
59
+ "epoch": 3.6,
60
+ "learning_rate": 3.1429999999999996e-05,
61
+ "loss": 3.0432,
62
+ "step": 900
63
+ },
64
+ {
65
+ "epoch": 4.0,
66
+ "learning_rate": 3.493e-05,
67
+ "loss": 3.0182,
68
+ "step": 1000
69
+ },
70
+ {
71
+ "epoch": 4.4,
72
+ "learning_rate": 3.843e-05,
73
+ "loss": 2.9412,
74
+ "step": 1100
75
+ },
76
+ {
77
+ "epoch": 4.8,
78
+ "learning_rate": 4.192999999999999e-05,
79
+ "loss": 2.8506,
80
+ "step": 1200
81
+ },
82
+ {
83
+ "epoch": 5.2,
84
+ "learning_rate": 4.543e-05,
85
+ "loss": 2.8052,
86
+ "step": 1300
87
+ },
88
+ {
89
+ "epoch": 5.6,
90
+ "learning_rate": 4.8929999999999994e-05,
91
+ "loss": 2.7483,
92
+ "step": 1400
93
+ },
94
+ {
95
+ "epoch": 6.0,
96
+ "learning_rate": 5.243e-05,
97
+ "loss": 2.562,
98
+ "step": 1500
99
+ },
100
+ {
101
+ "epoch": 6.4,
102
+ "learning_rate": 5.593e-05,
103
+ "loss": 2.1552,
104
+ "step": 1600
105
+ },
106
+ {
107
+ "epoch": 6.8,
108
+ "learning_rate": 5.942999999999999e-05,
109
+ "loss": 1.94,
110
+ "step": 1700
111
+ },
112
+ {
113
+ "epoch": 7.2,
114
+ "learning_rate": 6.293e-05,
115
+ "loss": 1.8211,
116
+ "step": 1800
117
+ },
118
+ {
119
+ "epoch": 7.6,
120
+ "learning_rate": 6.642999999999999e-05,
121
+ "loss": 1.7377,
122
+ "step": 1900
123
+ },
124
+ {
125
+ "epoch": 8.0,
126
+ "learning_rate": 6.992999999999999e-05,
127
+ "loss": 1.6837,
128
+ "step": 2000
129
+ },
130
+ {
131
+ "epoch": 8.0,
132
+ "eval_loss": 0.6648585200309753,
133
+ "eval_runtime": 162.4473,
134
+ "eval_samples_per_second": 21.121,
135
+ "eval_steps_per_second": 21.121,
136
+ "eval_wer": 0.7515401301518438,
137
+ "step": 2000
138
+ },
139
+ {
140
+ "epoch": 8.4,
141
+ "learning_rate": 6.934666666666666e-05,
142
+ "loss": 1.6398,
143
+ "step": 2100
144
+ },
145
+ {
146
+ "epoch": 8.8,
147
+ "learning_rate": 6.867999999999999e-05,
148
+ "loss": 1.5742,
149
+ "step": 2200
150
+ },
151
+ {
152
+ "epoch": 9.2,
153
+ "learning_rate": 6.801333333333332e-05,
154
+ "loss": 1.5003,
155
+ "step": 2300
156
+ },
157
+ {
158
+ "epoch": 9.6,
159
+ "learning_rate": 6.734666666666666e-05,
160
+ "loss": 1.4354,
161
+ "step": 2400
162
+ },
163
+ {
164
+ "epoch": 10.0,
165
+ "learning_rate": 6.667999999999999e-05,
166
+ "loss": 1.3654,
167
+ "step": 2500
168
+ },
169
+ {
170
+ "epoch": 10.4,
171
+ "learning_rate": 6.601333333333333e-05,
172
+ "loss": 1.3276,
173
+ "step": 2600
174
+ },
175
+ {
176
+ "epoch": 10.8,
177
+ "learning_rate": 6.534666666666666e-05,
178
+ "loss": 1.278,
179
+ "step": 2700
180
+ },
181
+ {
182
+ "epoch": 11.2,
183
+ "learning_rate": 6.468e-05,
184
+ "loss": 1.2794,
185
+ "step": 2800
186
+ },
187
+ {
188
+ "epoch": 11.6,
189
+ "learning_rate": 6.401333333333332e-05,
190
+ "loss": 1.2504,
191
+ "step": 2900
192
+ },
193
+ {
194
+ "epoch": 12.0,
195
+ "learning_rate": 6.334666666666667e-05,
196
+ "loss": 1.2293,
197
+ "step": 3000
198
+ },
199
+ {
200
+ "epoch": 12.4,
201
+ "learning_rate": 6.268e-05,
202
+ "loss": 1.2079,
203
+ "step": 3100
204
+ },
205
+ {
206
+ "epoch": 12.8,
207
+ "learning_rate": 6.201333333333332e-05,
208
+ "loss": 1.1966,
209
+ "step": 3200
210
+ },
211
+ {
212
+ "epoch": 13.2,
213
+ "learning_rate": 6.134666666666666e-05,
214
+ "loss": 1.1706,
215
+ "step": 3300
216
+ },
217
+ {
218
+ "epoch": 13.6,
219
+ "learning_rate": 6.0679999999999995e-05,
220
+ "loss": 1.1587,
221
+ "step": 3400
222
+ },
223
+ {
224
+ "epoch": 14.0,
225
+ "learning_rate": 6.0013333333333323e-05,
226
+ "loss": 1.1514,
227
+ "step": 3500
228
+ },
229
+ {
230
+ "epoch": 14.4,
231
+ "learning_rate": 5.934666666666666e-05,
232
+ "loss": 1.1437,
233
+ "step": 3600
234
+ },
235
+ {
236
+ "epoch": 14.8,
237
+ "learning_rate": 5.8679999999999994e-05,
238
+ "loss": 1.137,
239
+ "step": 3700
240
+ },
241
+ {
242
+ "epoch": 15.2,
243
+ "learning_rate": 5.801333333333333e-05,
244
+ "loss": 1.1301,
245
+ "step": 3800
246
+ },
247
+ {
248
+ "epoch": 15.6,
249
+ "learning_rate": 5.734666666666666e-05,
250
+ "loss": 1.108,
251
+ "step": 3900
252
+ },
253
+ {
254
+ "epoch": 16.0,
255
+ "learning_rate": 5.668666666666666e-05,
256
+ "loss": 1.1105,
257
+ "step": 4000
258
+ },
259
+ {
260
+ "epoch": 16.0,
261
+ "eval_loss": 0.23862487077713013,
262
+ "eval_runtime": 163.3024,
263
+ "eval_samples_per_second": 21.01,
264
+ "eval_steps_per_second": 21.01,
265
+ "eval_wer": 0.3436008676789588,
266
+ "step": 4000
267
+ },
268
+ {
269
+ "epoch": 16.4,
270
+ "learning_rate": 5.6019999999999996e-05,
271
+ "loss": 1.0962,
272
+ "step": 4100
273
+ },
274
+ {
275
+ "epoch": 16.8,
276
+ "learning_rate": 5.535333333333333e-05,
277
+ "loss": 1.0979,
278
+ "step": 4200
279
+ },
280
+ {
281
+ "epoch": 17.2,
282
+ "learning_rate": 5.4686666666666666e-05,
283
+ "loss": 1.0898,
284
+ "step": 4300
285
+ },
286
+ {
287
+ "epoch": 17.6,
288
+ "learning_rate": 5.4019999999999994e-05,
289
+ "loss": 1.0906,
290
+ "step": 4400
291
+ },
292
+ {
293
+ "epoch": 18.0,
294
+ "learning_rate": 5.335333333333333e-05,
295
+ "loss": 1.0685,
296
+ "step": 4500
297
+ },
298
+ {
299
+ "epoch": 18.4,
300
+ "learning_rate": 5.2686666666666665e-05,
301
+ "loss": 1.0622,
302
+ "step": 4600
303
+ },
304
+ {
305
+ "epoch": 18.8,
306
+ "learning_rate": 5.201999999999999e-05,
307
+ "loss": 1.0662,
308
+ "step": 4700
309
+ },
310
+ {
311
+ "epoch": 19.2,
312
+ "learning_rate": 5.135333333333333e-05,
313
+ "loss": 1.064,
314
+ "step": 4800
315
+ },
316
+ {
317
+ "epoch": 19.6,
318
+ "learning_rate": 5.0686666666666664e-05,
319
+ "loss": 1.0674,
320
+ "step": 4900
321
+ },
322
+ {
323
+ "epoch": 20.0,
324
+ "learning_rate": 5.001999999999999e-05,
325
+ "loss": 1.0565,
326
+ "step": 5000
327
+ },
328
+ {
329
+ "epoch": 20.4,
330
+ "learning_rate": 4.935333333333333e-05,
331
+ "loss": 1.0284,
332
+ "step": 5100
333
+ },
334
+ {
335
+ "epoch": 20.8,
336
+ "learning_rate": 4.868666666666666e-05,
337
+ "loss": 1.0432,
338
+ "step": 5200
339
+ },
340
+ {
341
+ "epoch": 21.2,
342
+ "learning_rate": 4.802e-05,
343
+ "loss": 1.0276,
344
+ "step": 5300
345
+ },
346
+ {
347
+ "epoch": 21.6,
348
+ "learning_rate": 4.7353333333333326e-05,
349
+ "loss": 1.0301,
350
+ "step": 5400
351
+ },
352
+ {
353
+ "epoch": 22.0,
354
+ "learning_rate": 4.668666666666666e-05,
355
+ "loss": 1.0273,
356
+ "step": 5500
357
+ },
358
+ {
359
+ "epoch": 22.4,
360
+ "learning_rate": 4.6019999999999996e-05,
361
+ "loss": 1.0306,
362
+ "step": 5600
363
+ },
364
+ {
365
+ "epoch": 22.8,
366
+ "learning_rate": 4.5353333333333325e-05,
367
+ "loss": 1.0204,
368
+ "step": 5700
369
+ },
370
+ {
371
+ "epoch": 23.2,
372
+ "learning_rate": 4.468666666666666e-05,
373
+ "loss": 1.0015,
374
+ "step": 5800
375
+ },
376
+ {
377
+ "epoch": 23.6,
378
+ "learning_rate": 4.4019999999999995e-05,
379
+ "loss": 1.0053,
380
+ "step": 5900
381
+ },
382
+ {
383
+ "epoch": 24.0,
384
+ "learning_rate": 4.3353333333333324e-05,
385
+ "loss": 1.0069,
386
+ "step": 6000
387
+ },
388
+ {
389
+ "epoch": 24.0,
390
+ "eval_loss": 0.20076848566532135,
391
+ "eval_runtime": 163.4066,
392
+ "eval_samples_per_second": 20.997,
393
+ "eval_steps_per_second": 20.997,
394
+ "eval_wer": 0.29683297180043383,
395
+ "step": 6000
396
+ },
397
+ {
398
+ "epoch": 24.4,
399
+ "learning_rate": 4.2686666666666666e-05,
400
+ "loss": 0.9962,
401
+ "step": 6100
402
+ },
403
+ {
404
+ "epoch": 24.8,
405
+ "learning_rate": 4.202e-05,
406
+ "loss": 1.0081,
407
+ "step": 6200
408
+ },
409
+ {
410
+ "epoch": 25.2,
411
+ "learning_rate": 4.136e-05,
412
+ "loss": 1.0055,
413
+ "step": 6300
414
+ },
415
+ {
416
+ "epoch": 25.6,
417
+ "learning_rate": 4.069333333333333e-05,
418
+ "loss": 0.9909,
419
+ "step": 6400
420
+ },
421
+ {
422
+ "epoch": 26.0,
423
+ "learning_rate": 4.002666666666666e-05,
424
+ "loss": 0.9936,
425
+ "step": 6500
426
+ },
427
+ {
428
+ "epoch": 26.4,
429
+ "learning_rate": 3.9359999999999996e-05,
430
+ "loss": 0.9702,
431
+ "step": 6600
432
+ },
433
+ {
434
+ "epoch": 26.8,
435
+ "learning_rate": 3.869333333333333e-05,
436
+ "loss": 0.9748,
437
+ "step": 6700
438
+ },
439
+ {
440
+ "epoch": 27.2,
441
+ "learning_rate": 3.802666666666666e-05,
442
+ "loss": 0.9923,
443
+ "step": 6800
444
+ },
445
+ {
446
+ "epoch": 27.6,
447
+ "learning_rate": 3.736666666666667e-05,
448
+ "loss": 0.976,
449
+ "step": 6900
450
+ },
451
+ {
452
+ "epoch": 28.0,
453
+ "learning_rate": 3.669999999999999e-05,
454
+ "loss": 0.9792,
455
+ "step": 7000
456
+ },
457
+ {
458
+ "epoch": 28.4,
459
+ "learning_rate": 3.603333333333333e-05,
460
+ "loss": 0.9667,
461
+ "step": 7100
462
+ },
463
+ {
464
+ "epoch": 28.8,
465
+ "learning_rate": 3.536666666666667e-05,
466
+ "loss": 0.9686,
467
+ "step": 7200
468
+ },
469
+ {
470
+ "epoch": 29.2,
471
+ "learning_rate": 3.4699999999999996e-05,
472
+ "loss": 0.9592,
473
+ "step": 7300
474
+ },
475
+ {
476
+ "epoch": 29.6,
477
+ "learning_rate": 3.403333333333333e-05,
478
+ "loss": 0.9512,
479
+ "step": 7400
480
+ },
481
+ {
482
+ "epoch": 30.0,
483
+ "learning_rate": 3.336666666666667e-05,
484
+ "loss": 0.9624,
485
+ "step": 7500
486
+ },
487
+ {
488
+ "epoch": 30.4,
489
+ "learning_rate": 3.2699999999999995e-05,
490
+ "loss": 0.9581,
491
+ "step": 7600
492
+ },
493
+ {
494
+ "epoch": 30.8,
495
+ "learning_rate": 3.203333333333333e-05,
496
+ "loss": 0.9421,
497
+ "step": 7700
498
+ },
499
+ {
500
+ "epoch": 31.2,
501
+ "learning_rate": 3.1366666666666666e-05,
502
+ "loss": 0.9468,
503
+ "step": 7800
504
+ },
505
+ {
506
+ "epoch": 31.6,
507
+ "learning_rate": 3.0699999999999994e-05,
508
+ "loss": 0.9411,
509
+ "step": 7900
510
+ },
511
+ {
512
+ "epoch": 32.0,
513
+ "learning_rate": 3.0033333333333333e-05,
514
+ "loss": 0.9417,
515
+ "step": 8000
516
+ },
517
+ {
518
+ "epoch": 32.0,
519
+ "eval_loss": 0.19149591028690338,
520
+ "eval_runtime": 163.3845,
521
+ "eval_samples_per_second": 21.0,
522
+ "eval_steps_per_second": 21.0,
523
+ "eval_wer": 0.2774403470715835,
524
+ "step": 8000
525
+ },
526
+ {
527
+ "epoch": 32.4,
528
+ "learning_rate": 2.9366666666666664e-05,
529
+ "loss": 0.9303,
530
+ "step": 8100
531
+ },
532
+ {
533
+ "epoch": 32.8,
534
+ "learning_rate": 2.8699999999999996e-05,
535
+ "loss": 0.9484,
536
+ "step": 8200
537
+ },
538
+ {
539
+ "epoch": 33.2,
540
+ "learning_rate": 2.803333333333333e-05,
541
+ "loss": 0.93,
542
+ "step": 8300
543
+ },
544
+ {
545
+ "epoch": 33.6,
546
+ "learning_rate": 2.7366666666666663e-05,
547
+ "loss": 0.9313,
548
+ "step": 8400
549
+ },
550
+ {
551
+ "epoch": 34.0,
552
+ "learning_rate": 2.67e-05,
553
+ "loss": 0.9365,
554
+ "step": 8500
555
+ },
556
+ {
557
+ "epoch": 34.4,
558
+ "learning_rate": 2.603333333333333e-05,
559
+ "loss": 0.9266,
560
+ "step": 8600
561
+ },
562
+ {
563
+ "epoch": 34.8,
564
+ "learning_rate": 2.5366666666666662e-05,
565
+ "loss": 0.9197,
566
+ "step": 8700
567
+ },
568
+ {
569
+ "epoch": 35.2,
570
+ "learning_rate": 2.4699999999999997e-05,
571
+ "loss": 0.9325,
572
+ "step": 8800
573
+ },
574
+ {
575
+ "epoch": 35.6,
576
+ "learning_rate": 2.403333333333333e-05,
577
+ "loss": 0.9178,
578
+ "step": 8900
579
+ },
580
+ {
581
+ "epoch": 36.0,
582
+ "learning_rate": 2.3366666666666668e-05,
583
+ "loss": 0.9107,
584
+ "step": 9000
585
+ },
586
+ {
587
+ "epoch": 36.4,
588
+ "learning_rate": 2.27e-05,
589
+ "loss": 0.9152,
590
+ "step": 9100
591
+ },
592
+ {
593
+ "epoch": 36.8,
594
+ "learning_rate": 2.203333333333333e-05,
595
+ "loss": 0.9043,
596
+ "step": 9200
597
+ },
598
+ {
599
+ "epoch": 37.2,
600
+ "learning_rate": 2.1366666666666667e-05,
601
+ "loss": 0.905,
602
+ "step": 9300
603
+ },
604
+ {
605
+ "epoch": 37.6,
606
+ "learning_rate": 2.07e-05,
607
+ "loss": 0.9086,
608
+ "step": 9400
609
+ },
610
+ {
611
+ "epoch": 38.0,
612
+ "learning_rate": 2.0033333333333334e-05,
613
+ "loss": 0.9144,
614
+ "step": 9500
615
+ },
616
+ {
617
+ "epoch": 38.4,
618
+ "learning_rate": 1.9366666666666665e-05,
619
+ "loss": 0.9043,
620
+ "step": 9600
621
+ },
622
+ {
623
+ "epoch": 38.8,
624
+ "learning_rate": 1.8706666666666665e-05,
625
+ "loss": 0.895,
626
+ "step": 9700
627
+ },
628
+ {
629
+ "epoch": 39.2,
630
+ "learning_rate": 1.804e-05,
631
+ "loss": 0.9035,
632
+ "step": 9800
633
+ },
634
+ {
635
+ "epoch": 39.6,
636
+ "learning_rate": 1.7373333333333332e-05,
637
+ "loss": 0.8993,
638
+ "step": 9900
639
+ },
640
+ {
641
+ "epoch": 40.0,
642
+ "learning_rate": 1.6706666666666664e-05,
643
+ "loss": 0.887,
644
+ "step": 10000
645
+ },
646
+ {
647
+ "epoch": 40.0,
648
+ "eval_loss": 0.18192386627197266,
649
+ "eval_runtime": 161.0783,
650
+ "eval_samples_per_second": 21.3,
651
+ "eval_steps_per_second": 21.3,
652
+ "eval_wer": 0.26156182212581347,
653
+ "step": 10000
654
+ },
655
+ {
656
+ "epoch": 40.4,
657
+ "learning_rate": 1.604e-05,
658
+ "loss": 0.8917,
659
+ "step": 10100
660
+ },
661
+ {
662
+ "epoch": 40.8,
663
+ "learning_rate": 1.5373333333333334e-05,
664
+ "loss": 0.8863,
665
+ "step": 10200
666
+ },
667
+ {
668
+ "epoch": 41.2,
669
+ "learning_rate": 1.4706666666666664e-05,
670
+ "loss": 0.8793,
671
+ "step": 10300
672
+ },
673
+ {
674
+ "epoch": 41.6,
675
+ "learning_rate": 1.4039999999999998e-05,
676
+ "loss": 0.8818,
677
+ "step": 10400
678
+ },
679
+ {
680
+ "epoch": 42.0,
681
+ "learning_rate": 1.3373333333333333e-05,
682
+ "loss": 0.8873,
683
+ "step": 10500
684
+ },
685
+ {
686
+ "epoch": 42.4,
687
+ "learning_rate": 1.2706666666666666e-05,
688
+ "loss": 0.8873,
689
+ "step": 10600
690
+ },
691
+ {
692
+ "epoch": 42.8,
693
+ "learning_rate": 1.2039999999999998e-05,
694
+ "loss": 0.8683,
695
+ "step": 10700
696
+ },
697
+ {
698
+ "epoch": 43.2,
699
+ "learning_rate": 1.1373333333333332e-05,
700
+ "loss": 0.8815,
701
+ "step": 10800
702
+ },
703
+ {
704
+ "epoch": 43.6,
705
+ "learning_rate": 1.0706666666666665e-05,
706
+ "loss": 0.8715,
707
+ "step": 10900
708
+ },
709
+ {
710
+ "epoch": 44.0,
711
+ "learning_rate": 1.0039999999999999e-05,
712
+ "loss": 0.8732,
713
+ "step": 11000
714
+ },
715
+ {
716
+ "epoch": 44.4,
717
+ "learning_rate": 9.373333333333334e-06,
718
+ "loss": 0.8836,
719
+ "step": 11100
720
+ },
721
+ {
722
+ "epoch": 44.8,
723
+ "learning_rate": 8.706666666666666e-06,
724
+ "loss": 0.8609,
725
+ "step": 11200
726
+ },
727
+ {
728
+ "epoch": 45.2,
729
+ "learning_rate": 8.04e-06,
730
+ "loss": 0.882,
731
+ "step": 11300
732
+ },
733
+ {
734
+ "epoch": 45.6,
735
+ "learning_rate": 7.373333333333333e-06,
736
+ "loss": 0.8702,
737
+ "step": 11400
738
+ },
739
+ {
740
+ "epoch": 46.0,
741
+ "learning_rate": 6.706666666666665e-06,
742
+ "loss": 0.8673,
743
+ "step": 11500
744
+ },
745
+ {
746
+ "epoch": 46.4,
747
+ "learning_rate": 6.04e-06,
748
+ "loss": 0.8638,
749
+ "step": 11600
750
+ },
751
+ {
752
+ "epoch": 46.8,
753
+ "learning_rate": 5.373333333333333e-06,
754
+ "loss": 0.877,
755
+ "step": 11700
756
+ },
757
+ {
758
+ "epoch": 47.2,
759
+ "learning_rate": 4.706666666666666e-06,
760
+ "loss": 0.8605,
761
+ "step": 11800
762
+ },
763
+ {
764
+ "epoch": 47.6,
765
+ "learning_rate": 4.0399999999999994e-06,
766
+ "loss": 0.8472,
767
+ "step": 11900
768
+ },
769
+ {
770
+ "epoch": 48.0,
771
+ "learning_rate": 3.3733333333333334e-06,
772
+ "loss": 0.8563,
773
+ "step": 12000
774
+ },
775
+ {
776
+ "epoch": 48.0,
777
+ "eval_loss": 0.17289325594902039,
778
+ "eval_runtime": 164.3624,
779
+ "eval_samples_per_second": 20.875,
780
+ "eval_steps_per_second": 20.875,
781
+ "eval_wer": 0.24754880694143166,
782
+ "step": 12000
783
+ },
784
+ {
785
+ "epoch": 48.4,
786
+ "learning_rate": 2.7066666666666664e-06,
787
+ "loss": 0.8802,
788
+ "step": 12100
789
+ },
790
+ {
791
+ "epoch": 48.8,
792
+ "learning_rate": 2.04e-06,
793
+ "loss": 0.8534,
794
+ "step": 12200
795
+ },
796
+ {
797
+ "epoch": 49.2,
798
+ "learning_rate": 1.3733333333333332e-06,
799
+ "loss": 0.8543,
800
+ "step": 12300
801
+ },
802
+ {
803
+ "epoch": 49.6,
804
+ "learning_rate": 7.066666666666665e-07,
805
+ "loss": 0.8663,
806
+ "step": 12400
807
+ },
808
+ {
809
+ "epoch": 50.0,
810
+ "learning_rate": 4e-08,
811
+ "loss": 0.8528,
812
+ "step": 12500
813
+ },
814
+ {
815
+ "epoch": 50.0,
816
+ "step": 12500,
817
+ "total_flos": 4.84683909774298e+19,
818
+ "train_loss": 1.3891823223876953,
819
+ "train_runtime": 18203.5679,
820
+ "train_samples_per_second": 21.944,
821
+ "train_steps_per_second": 0.687
822
+ }
823
+ ],
824
+ "max_steps": 12500,
825
+ "num_train_epochs": 50,
826
+ "total_flos": 4.84683909774298e+19,
827
+ "trial_name": null,
828
+ "trial_params": null
829
+ }