matsuap commited on
Commit
85f3597
1 Parent(s): 52db47d

Upload 9 files

Browse files
config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai/whisper-medium",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "apply_spec_augment": false,
6
+ "architectures": [
7
+ "WhisperForConditionalGeneration"
8
+ ],
9
+ "attention_dropout": 0.0,
10
+ "begin_suppress_tokens": [
11
+ 220,
12
+ 50257
13
+ ],
14
+ "bos_token_id": 50257,
15
+ "classifier_proj_size": 256,
16
+ "d_model": 1024,
17
+ "decoder_attention_heads": 16,
18
+ "decoder_ffn_dim": 4096,
19
+ "decoder_layerdrop": 0.0,
20
+ "decoder_layers": 24,
21
+ "decoder_start_token_id": 50258,
22
+ "dropout": 0.0,
23
+ "encoder_attention_heads": 16,
24
+ "encoder_ffn_dim": 4096,
25
+ "encoder_layerdrop": 0.0,
26
+ "encoder_layers": 24,
27
+ "eos_token_id": 50257,
28
+ "forced_decoder_ids": [
29
+ [
30
+ 1,
31
+ 50266
32
+ ],
33
+ [
34
+ 2,
35
+ 50359
36
+ ],
37
+ [
38
+ 3,
39
+ 50363
40
+ ]
41
+ ],
42
+ "init_std": 0.02,
43
+ "is_encoder_decoder": true,
44
+ "mask_feature_length": 10,
45
+ "mask_feature_min_masks": 0,
46
+ "mask_feature_prob": 0.0,
47
+ "mask_time_length": 10,
48
+ "mask_time_min_masks": 2,
49
+ "mask_time_prob": 0.05,
50
+ "max_length": 448,
51
+ "max_source_positions": 1500,
52
+ "max_target_positions": 448,
53
+ "median_filter_width": 7,
54
+ "model_type": "whisper",
55
+ "num_hidden_layers": 24,
56
+ "num_mel_bins": 80,
57
+ "pad_token_id": 50257,
58
+ "scale_embedding": false,
59
+ "suppress_tokens": [],
60
+ "torch_dtype": "float32",
61
+ "transformers_version": "4.44.2",
62
+ "use_cache": true,
63
+ "use_weighted_layer_sum": false,
64
+ "vocab_size": 51865
65
+ }
generation_config.json ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alignment_heads": [
3
+ [
4
+ 13,
5
+ 15
6
+ ],
7
+ [
8
+ 15,
9
+ 4
10
+ ],
11
+ [
12
+ 15,
13
+ 15
14
+ ],
15
+ [
16
+ 16,
17
+ 1
18
+ ],
19
+ [
20
+ 20,
21
+ 0
22
+ ],
23
+ [
24
+ 23,
25
+ 4
26
+ ]
27
+ ],
28
+ "begin_suppress_tokens": [
29
+ 220,
30
+ 50257
31
+ ],
32
+ "bos_token_id": 50257,
33
+ "decoder_start_token_id": 50258,
34
+ "eos_token_id": 50257,
35
+ "forced_decoder_ids": [
36
+ [
37
+ 1,
38
+ null
39
+ ],
40
+ [
41
+ 2,
42
+ 50359
43
+ ]
44
+ ],
45
+ "is_multilingual": true,
46
+ "lang_to_id": {
47
+ "<|af|>": 50327,
48
+ "<|am|>": 50334,
49
+ "<|ar|>": 50272,
50
+ "<|as|>": 50350,
51
+ "<|az|>": 50304,
52
+ "<|ba|>": 50355,
53
+ "<|be|>": 50330,
54
+ "<|bg|>": 50292,
55
+ "<|bn|>": 50302,
56
+ "<|bo|>": 50347,
57
+ "<|br|>": 50309,
58
+ "<|bs|>": 50315,
59
+ "<|ca|>": 50270,
60
+ "<|cs|>": 50283,
61
+ "<|cy|>": 50297,
62
+ "<|da|>": 50285,
63
+ "<|de|>": 50261,
64
+ "<|el|>": 50281,
65
+ "<|en|>": 50259,
66
+ "<|es|>": 50262,
67
+ "<|et|>": 50307,
68
+ "<|eu|>": 50310,
69
+ "<|fa|>": 50300,
70
+ "<|fi|>": 50277,
71
+ "<|fo|>": 50338,
72
+ "<|fr|>": 50265,
73
+ "<|gl|>": 50319,
74
+ "<|gu|>": 50333,
75
+ "<|haw|>": 50352,
76
+ "<|ha|>": 50354,
77
+ "<|he|>": 50279,
78
+ "<|hi|>": 50276,
79
+ "<|hr|>": 50291,
80
+ "<|ht|>": 50339,
81
+ "<|hu|>": 50286,
82
+ "<|hy|>": 50312,
83
+ "<|id|>": 50275,
84
+ "<|is|>": 50311,
85
+ "<|it|>": 50274,
86
+ "<|ja|>": 50266,
87
+ "<|jw|>": 50356,
88
+ "<|ka|>": 50329,
89
+ "<|kk|>": 50316,
90
+ "<|km|>": 50323,
91
+ "<|kn|>": 50306,
92
+ "<|ko|>": 50264,
93
+ "<|la|>": 50294,
94
+ "<|lb|>": 50345,
95
+ "<|ln|>": 50353,
96
+ "<|lo|>": 50336,
97
+ "<|lt|>": 50293,
98
+ "<|lv|>": 50301,
99
+ "<|mg|>": 50349,
100
+ "<|mi|>": 50295,
101
+ "<|mk|>": 50308,
102
+ "<|ml|>": 50296,
103
+ "<|mn|>": 50314,
104
+ "<|mr|>": 50320,
105
+ "<|ms|>": 50282,
106
+ "<|mt|>": 50343,
107
+ "<|my|>": 50346,
108
+ "<|ne|>": 50313,
109
+ "<|nl|>": 50271,
110
+ "<|nn|>": 50342,
111
+ "<|no|>": 50288,
112
+ "<|oc|>": 50328,
113
+ "<|pa|>": 50321,
114
+ "<|pl|>": 50269,
115
+ "<|ps|>": 50340,
116
+ "<|pt|>": 50267,
117
+ "<|ro|>": 50284,
118
+ "<|ru|>": 50263,
119
+ "<|sa|>": 50344,
120
+ "<|sd|>": 50332,
121
+ "<|si|>": 50322,
122
+ "<|sk|>": 50298,
123
+ "<|sl|>": 50305,
124
+ "<|sn|>": 50324,
125
+ "<|so|>": 50326,
126
+ "<|sq|>": 50317,
127
+ "<|sr|>": 50303,
128
+ "<|su|>": 50357,
129
+ "<|sv|>": 50273,
130
+ "<|sw|>": 50318,
131
+ "<|ta|>": 50287,
132
+ "<|te|>": 50299,
133
+ "<|tg|>": 50331,
134
+ "<|th|>": 50289,
135
+ "<|tk|>": 50341,
136
+ "<|tl|>": 50348,
137
+ "<|tr|>": 50268,
138
+ "<|tt|>": 50351,
139
+ "<|uk|>": 50280,
140
+ "<|ur|>": 50290,
141
+ "<|uz|>": 50337,
142
+ "<|vi|>": 50278,
143
+ "<|yi|>": 50335,
144
+ "<|yo|>": 50325,
145
+ "<|zh|>": 50260
146
+ },
147
+ "max_initial_timestamp_index": 50,
148
+ "max_length": 448,
149
+ "no_timestamps_token_id": 50363,
150
+ "pad_token_id": 50257,
151
+ "prev_sot_token_id": 50361,
152
+ "return_timestamps": false,
153
+ "suppress_tokens": [
154
+ 1,
155
+ 2,
156
+ 7,
157
+ 8,
158
+ 9,
159
+ 10,
160
+ 14,
161
+ 25,
162
+ 26,
163
+ 27,
164
+ 28,
165
+ 29,
166
+ 31,
167
+ 58,
168
+ 59,
169
+ 60,
170
+ 61,
171
+ 62,
172
+ 63,
173
+ 90,
174
+ 91,
175
+ 92,
176
+ 93,
177
+ 359,
178
+ 503,
179
+ 522,
180
+ 542,
181
+ 873,
182
+ 893,
183
+ 902,
184
+ 918,
185
+ 922,
186
+ 931,
187
+ 1350,
188
+ 1853,
189
+ 1982,
190
+ 2460,
191
+ 2627,
192
+ 3246,
193
+ 3253,
194
+ 3268,
195
+ 3536,
196
+ 3846,
197
+ 3961,
198
+ 4183,
199
+ 4667,
200
+ 6585,
201
+ 6647,
202
+ 7273,
203
+ 9061,
204
+ 9383,
205
+ 10428,
206
+ 10929,
207
+ 11938,
208
+ 12033,
209
+ 12331,
210
+ 12562,
211
+ 13793,
212
+ 14157,
213
+ 14635,
214
+ 15265,
215
+ 15618,
216
+ 16553,
217
+ 16604,
218
+ 18362,
219
+ 18956,
220
+ 20075,
221
+ 21675,
222
+ 22520,
223
+ 26130,
224
+ 26161,
225
+ 26435,
226
+ 28279,
227
+ 29464,
228
+ 31650,
229
+ 32302,
230
+ 32470,
231
+ 36865,
232
+ 42863,
233
+ 47425,
234
+ 49870,
235
+ 50254,
236
+ 50258,
237
+ 50358,
238
+ 50359,
239
+ 50360,
240
+ 50361,
241
+ 50362
242
+ ],
243
+ "task_to_id": {
244
+ "transcribe": 50359,
245
+ "translate": 50358
246
+ },
247
+ "transformers_version": "4.44.2"
248
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a92adc0f6e311f1e32a452662339c3ddfa8345e3acdd860f6e9d863c26ec5ef3
3
+ size 3055544304
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa37f6a7556126f2d9076949f15df386c4f2a64376023cfa947a98583c2c1579
3
+ size 3653619038
preprocessor_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunk_length": 30,
3
+ "feature_extractor_type": "WhisperFeatureExtractor",
4
+ "feature_size": 80,
5
+ "hop_length": 160,
6
+ "n_fft": 400,
7
+ "n_samples": 480000,
8
+ "nb_max_frames": 3000,
9
+ "padding_side": "right",
10
+ "padding_value": 0.0,
11
+ "processor_class": "WhisperProcessor",
12
+ "return_attention_mask": false,
13
+ "sampling_rate": 16000
14
+ }
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3d11e72c6be81e08d080a52df4f0e16adb27dedf145a913680acd9531ef65a4
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9247523a2a09e2300eda233d249b37d104af98f3bfd334c48fdfa496dd543bfb
3
+ size 1064
trainer_state.json ADDED
@@ -0,0 +1,1742 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.3294712007045746,
3
+ "best_model_checkpoint": "/content/drive/MyDrive/Development/whisperfinetune/whisper-medium-20241208122604/20241208122604/checkpoint-100",
4
+ "epoch": 100.0,
5
+ "eval_steps": 1,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 1.1317138671875,
14
+ "learning_rate": 1e-05,
15
+ "loss": 0.0573,
16
+ "step": 1
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_loss": 2.347386121749878,
21
+ "eval_model_preparation_time": 0.0121,
22
+ "eval_runtime": 13.1236,
23
+ "eval_samples_per_second": 1.524,
24
+ "eval_steps_per_second": 0.229,
25
+ "eval_wer": 11.728395061728394,
26
+ "step": 1
27
+ },
28
+ {
29
+ "epoch": 2.0,
30
+ "grad_norm": 0.5305148959159851,
31
+ "learning_rate": 1e-05,
32
+ "loss": 0.0595,
33
+ "step": 2
34
+ },
35
+ {
36
+ "epoch": 2.0,
37
+ "eval_loss": 2.04854416847229,
38
+ "eval_model_preparation_time": 0.0121,
39
+ "eval_runtime": 13.3393,
40
+ "eval_samples_per_second": 1.499,
41
+ "eval_steps_per_second": 0.225,
42
+ "eval_wer": 8.641975308641975,
43
+ "step": 2
44
+ },
45
+ {
46
+ "epoch": 3.0,
47
+ "grad_norm": 0.47034594416618347,
48
+ "learning_rate": 1e-05,
49
+ "loss": 0.0515,
50
+ "step": 3
51
+ },
52
+ {
53
+ "epoch": 3.0,
54
+ "eval_loss": 1.7598810195922852,
55
+ "eval_model_preparation_time": 0.0121,
56
+ "eval_runtime": 13.2129,
57
+ "eval_samples_per_second": 1.514,
58
+ "eval_steps_per_second": 0.227,
59
+ "eval_wer": 5.555555555555555,
60
+ "step": 3
61
+ },
62
+ {
63
+ "epoch": 4.0,
64
+ "grad_norm": 0.8446077108383179,
65
+ "learning_rate": 1e-05,
66
+ "loss": 0.044,
67
+ "step": 4
68
+ },
69
+ {
70
+ "epoch": 4.0,
71
+ "eval_loss": 1.1538535356521606,
72
+ "eval_model_preparation_time": 0.0121,
73
+ "eval_runtime": 13.3081,
74
+ "eval_samples_per_second": 1.503,
75
+ "eval_steps_per_second": 0.225,
76
+ "eval_wer": 5.246913580246913,
77
+ "step": 4
78
+ },
79
+ {
80
+ "epoch": 5.0,
81
+ "grad_norm": 0.1741047352552414,
82
+ "learning_rate": 1e-05,
83
+ "loss": 0.0282,
84
+ "step": 5
85
+ },
86
+ {
87
+ "epoch": 5.0,
88
+ "eval_loss": 1.1221383810043335,
89
+ "eval_model_preparation_time": 0.0121,
90
+ "eval_runtime": 13.1632,
91
+ "eval_samples_per_second": 1.519,
92
+ "eval_steps_per_second": 0.228,
93
+ "eval_wer": 4.938271604938271,
94
+ "step": 5
95
+ },
96
+ {
97
+ "epoch": 6.0,
98
+ "grad_norm": 0.16934086382389069,
99
+ "learning_rate": 1e-05,
100
+ "loss": 0.0276,
101
+ "step": 6
102
+ },
103
+ {
104
+ "epoch": 6.0,
105
+ "eval_loss": 1.0995134115219116,
106
+ "eval_model_preparation_time": 0.0121,
107
+ "eval_runtime": 13.2456,
108
+ "eval_samples_per_second": 1.51,
109
+ "eval_steps_per_second": 0.226,
110
+ "eval_wer": 4.320987654320987,
111
+ "step": 6
112
+ },
113
+ {
114
+ "epoch": 7.0,
115
+ "grad_norm": 0.166069895029068,
116
+ "learning_rate": 1e-05,
117
+ "loss": 0.0271,
118
+ "step": 7
119
+ },
120
+ {
121
+ "epoch": 7.0,
122
+ "eval_loss": 1.0805295705795288,
123
+ "eval_model_preparation_time": 0.0121,
124
+ "eval_runtime": 13.0555,
125
+ "eval_samples_per_second": 1.532,
126
+ "eval_steps_per_second": 0.23,
127
+ "eval_wer": 4.320987654320987,
128
+ "step": 7
129
+ },
130
+ {
131
+ "epoch": 8.0,
132
+ "grad_norm": 0.1637350171804428,
133
+ "learning_rate": 1e-05,
134
+ "loss": 0.0266,
135
+ "step": 8
136
+ },
137
+ {
138
+ "epoch": 8.0,
139
+ "eval_loss": 1.0605648756027222,
140
+ "eval_model_preparation_time": 0.0121,
141
+ "eval_runtime": 13.2511,
142
+ "eval_samples_per_second": 1.509,
143
+ "eval_steps_per_second": 0.226,
144
+ "eval_wer": 4.320987654320987,
145
+ "step": 8
146
+ },
147
+ {
148
+ "epoch": 9.0,
149
+ "grad_norm": 0.16157877445220947,
150
+ "learning_rate": 1e-05,
151
+ "loss": 0.026,
152
+ "step": 9
153
+ },
154
+ {
155
+ "epoch": 9.0,
156
+ "eval_loss": 1.0405693054199219,
157
+ "eval_model_preparation_time": 0.0121,
158
+ "eval_runtime": 13.1332,
159
+ "eval_samples_per_second": 1.523,
160
+ "eval_steps_per_second": 0.228,
161
+ "eval_wer": 4.62962962962963,
162
+ "step": 9
163
+ },
164
+ {
165
+ "epoch": 10.0,
166
+ "grad_norm": 0.1597001850605011,
167
+ "learning_rate": 1e-05,
168
+ "loss": 0.0255,
169
+ "step": 10
170
+ },
171
+ {
172
+ "epoch": 10.0,
173
+ "eval_loss": 1.021639347076416,
174
+ "eval_model_preparation_time": 0.0121,
175
+ "eval_runtime": 13.0982,
176
+ "eval_samples_per_second": 1.527,
177
+ "eval_steps_per_second": 0.229,
178
+ "eval_wer": 4.62962962962963,
179
+ "step": 10
180
+ },
181
+ {
182
+ "epoch": 11.0,
183
+ "grad_norm": 0.15809614956378937,
184
+ "learning_rate": 1e-05,
185
+ "loss": 0.025,
186
+ "step": 11
187
+ },
188
+ {
189
+ "epoch": 11.0,
190
+ "eval_loss": 1.0027821063995361,
191
+ "eval_model_preparation_time": 0.0121,
192
+ "eval_runtime": 13.2797,
193
+ "eval_samples_per_second": 1.506,
194
+ "eval_steps_per_second": 0.226,
195
+ "eval_wer": 4.62962962962963,
196
+ "step": 11
197
+ },
198
+ {
199
+ "epoch": 12.0,
200
+ "grad_norm": 0.15653067827224731,
201
+ "learning_rate": 1e-05,
202
+ "loss": 0.0244,
203
+ "step": 12
204
+ },
205
+ {
206
+ "epoch": 12.0,
207
+ "eval_loss": 0.9836124777793884,
208
+ "eval_model_preparation_time": 0.0121,
209
+ "eval_runtime": 18.954,
210
+ "eval_samples_per_second": 1.055,
211
+ "eval_steps_per_second": 0.158,
212
+ "eval_wer": 32.407407407407405,
213
+ "step": 12
214
+ },
215
+ {
216
+ "epoch": 13.0,
217
+ "grad_norm": 0.15485768020153046,
218
+ "learning_rate": 1e-05,
219
+ "loss": 0.0239,
220
+ "step": 13
221
+ },
222
+ {
223
+ "epoch": 13.0,
224
+ "eval_loss": 0.9660758376121521,
225
+ "eval_model_preparation_time": 0.0121,
226
+ "eval_runtime": 19.023,
227
+ "eval_samples_per_second": 1.051,
228
+ "eval_steps_per_second": 0.158,
229
+ "eval_wer": 32.407407407407405,
230
+ "step": 13
231
+ },
232
+ {
233
+ "epoch": 14.0,
234
+ "grad_norm": 0.1532512605190277,
235
+ "learning_rate": 1e-05,
236
+ "loss": 0.0234,
237
+ "step": 14
238
+ },
239
+ {
240
+ "epoch": 14.0,
241
+ "eval_loss": 0.9473101496696472,
242
+ "eval_model_preparation_time": 0.0121,
243
+ "eval_runtime": 19.1218,
244
+ "eval_samples_per_second": 1.046,
245
+ "eval_steps_per_second": 0.157,
246
+ "eval_wer": 32.407407407407405,
247
+ "step": 14
248
+ },
249
+ {
250
+ "epoch": 15.0,
251
+ "grad_norm": 0.15129534900188446,
252
+ "learning_rate": 1e-05,
253
+ "loss": 0.0229,
254
+ "step": 15
255
+ },
256
+ {
257
+ "epoch": 15.0,
258
+ "eval_loss": 0.9287378191947937,
259
+ "eval_model_preparation_time": 0.0121,
260
+ "eval_runtime": 18.9602,
261
+ "eval_samples_per_second": 1.055,
262
+ "eval_steps_per_second": 0.158,
263
+ "eval_wer": 32.407407407407405,
264
+ "step": 15
265
+ },
266
+ {
267
+ "epoch": 16.0,
268
+ "grad_norm": 0.1490040421485901,
269
+ "learning_rate": 1e-05,
270
+ "loss": 0.0224,
271
+ "step": 16
272
+ },
273
+ {
274
+ "epoch": 16.0,
275
+ "eval_loss": 0.9106081128120422,
276
+ "eval_model_preparation_time": 0.0121,
277
+ "eval_runtime": 19.0132,
278
+ "eval_samples_per_second": 1.052,
279
+ "eval_steps_per_second": 0.158,
280
+ "eval_wer": 30.555555555555557,
281
+ "step": 16
282
+ },
283
+ {
284
+ "epoch": 17.0,
285
+ "grad_norm": 0.14625835418701172,
286
+ "learning_rate": 1e-05,
287
+ "loss": 0.0218,
288
+ "step": 17
289
+ },
290
+ {
291
+ "epoch": 17.0,
292
+ "eval_loss": 0.8923694491386414,
293
+ "eval_model_preparation_time": 0.0121,
294
+ "eval_runtime": 19.1496,
295
+ "eval_samples_per_second": 1.044,
296
+ "eval_steps_per_second": 0.157,
297
+ "eval_wer": 32.098765432098766,
298
+ "step": 17
299
+ },
300
+ {
301
+ "epoch": 18.0,
302
+ "grad_norm": 0.1428905576467514,
303
+ "learning_rate": 1e-05,
304
+ "loss": 0.0213,
305
+ "step": 18
306
+ },
307
+ {
308
+ "epoch": 18.0,
309
+ "eval_loss": 0.8747221827507019,
310
+ "eval_model_preparation_time": 0.0121,
311
+ "eval_runtime": 18.9828,
312
+ "eval_samples_per_second": 1.054,
313
+ "eval_steps_per_second": 0.158,
314
+ "eval_wer": 30.555555555555557,
315
+ "step": 18
316
+ },
317
+ {
318
+ "epoch": 19.0,
319
+ "grad_norm": 0.13874377310276031,
320
+ "learning_rate": 1e-05,
321
+ "loss": 0.0209,
322
+ "step": 19
323
+ },
324
+ {
325
+ "epoch": 19.0,
326
+ "eval_loss": 0.8583929538726807,
327
+ "eval_model_preparation_time": 0.0121,
328
+ "eval_runtime": 13.0296,
329
+ "eval_samples_per_second": 1.535,
330
+ "eval_steps_per_second": 0.23,
331
+ "eval_wer": 4.320987654320987,
332
+ "step": 19
333
+ },
334
+ {
335
+ "epoch": 20.0,
336
+ "grad_norm": 0.13400524854660034,
337
+ "learning_rate": 1e-05,
338
+ "loss": 0.0204,
339
+ "step": 20
340
+ },
341
+ {
342
+ "epoch": 20.0,
343
+ "eval_loss": 0.842006504535675,
344
+ "eval_model_preparation_time": 0.0121,
345
+ "eval_runtime": 13.0362,
346
+ "eval_samples_per_second": 1.534,
347
+ "eval_steps_per_second": 0.23,
348
+ "eval_wer": 4.320987654320987,
349
+ "step": 20
350
+ },
351
+ {
352
+ "epoch": 21.0,
353
+ "grad_norm": 0.128588005900383,
354
+ "learning_rate": 1e-05,
355
+ "loss": 0.02,
356
+ "step": 21
357
+ },
358
+ {
359
+ "epoch": 21.0,
360
+ "eval_loss": 0.8266332745552063,
361
+ "eval_model_preparation_time": 0.0121,
362
+ "eval_runtime": 13.1349,
363
+ "eval_samples_per_second": 1.523,
364
+ "eval_steps_per_second": 0.228,
365
+ "eval_wer": 4.320987654320987,
366
+ "step": 21
367
+ },
368
+ {
369
+ "epoch": 22.0,
370
+ "grad_norm": 0.12290169298648834,
371
+ "learning_rate": 1e-05,
372
+ "loss": 0.0196,
373
+ "step": 22
374
+ },
375
+ {
376
+ "epoch": 22.0,
377
+ "eval_loss": 0.812411367893219,
378
+ "eval_model_preparation_time": 0.0121,
379
+ "eval_runtime": 13.0331,
380
+ "eval_samples_per_second": 1.535,
381
+ "eval_steps_per_second": 0.23,
382
+ "eval_wer": 4.320987654320987,
383
+ "step": 22
384
+ },
385
+ {
386
+ "epoch": 23.0,
387
+ "grad_norm": 0.11754843592643738,
388
+ "learning_rate": 1e-05,
389
+ "loss": 0.0193,
390
+ "step": 23
391
+ },
392
+ {
393
+ "epoch": 23.0,
394
+ "eval_loss": 0.7992361187934875,
395
+ "eval_model_preparation_time": 0.0121,
396
+ "eval_runtime": 13.0451,
397
+ "eval_samples_per_second": 1.533,
398
+ "eval_steps_per_second": 0.23,
399
+ "eval_wer": 4.320987654320987,
400
+ "step": 23
401
+ },
402
+ {
403
+ "epoch": 24.0,
404
+ "grad_norm": 0.11315960437059402,
405
+ "learning_rate": 1e-05,
406
+ "loss": 0.019,
407
+ "step": 24
408
+ },
409
+ {
410
+ "epoch": 24.0,
411
+ "eval_loss": 0.7874162197113037,
412
+ "eval_model_preparation_time": 0.0121,
413
+ "eval_runtime": 13.0108,
414
+ "eval_samples_per_second": 1.537,
415
+ "eval_steps_per_second": 0.231,
416
+ "eval_wer": 4.320987654320987,
417
+ "step": 24
418
+ },
419
+ {
420
+ "epoch": 25.0,
421
+ "grad_norm": 0.10973851382732391,
422
+ "learning_rate": 1e-05,
423
+ "loss": 0.0187,
424
+ "step": 25
425
+ },
426
+ {
427
+ "epoch": 25.0,
428
+ "eval_loss": 0.7770275473594666,
429
+ "eval_model_preparation_time": 0.0121,
430
+ "eval_runtime": 13.0113,
431
+ "eval_samples_per_second": 1.537,
432
+ "eval_steps_per_second": 0.231,
433
+ "eval_wer": 4.320987654320987,
434
+ "step": 25
435
+ },
436
+ {
437
+ "epoch": 26.0,
438
+ "grad_norm": 0.10730645060539246,
439
+ "learning_rate": 1e-05,
440
+ "loss": 0.0185,
441
+ "step": 26
442
+ },
443
+ {
444
+ "epoch": 26.0,
445
+ "eval_loss": 0.7670324444770813,
446
+ "eval_model_preparation_time": 0.0121,
447
+ "eval_runtime": 13.0264,
448
+ "eval_samples_per_second": 1.535,
449
+ "eval_steps_per_second": 0.23,
450
+ "eval_wer": 4.320987654320987,
451
+ "step": 26
452
+ },
453
+ {
454
+ "epoch": 27.0,
455
+ "grad_norm": 0.10558220744132996,
456
+ "learning_rate": 1e-05,
457
+ "loss": 0.0183,
458
+ "step": 27
459
+ },
460
+ {
461
+ "epoch": 27.0,
462
+ "eval_loss": 0.7583534121513367,
463
+ "eval_model_preparation_time": 0.0121,
464
+ "eval_runtime": 13.0554,
465
+ "eval_samples_per_second": 1.532,
466
+ "eval_steps_per_second": 0.23,
467
+ "eval_wer": 4.320987654320987,
468
+ "step": 27
469
+ },
470
+ {
471
+ "epoch": 28.0,
472
+ "grad_norm": 0.10448750853538513,
473
+ "learning_rate": 1e-05,
474
+ "loss": 0.0181,
475
+ "step": 28
476
+ },
477
+ {
478
+ "epoch": 28.0,
479
+ "eval_loss": 0.7502322793006897,
480
+ "eval_model_preparation_time": 0.0121,
481
+ "eval_runtime": 13.0142,
482
+ "eval_samples_per_second": 1.537,
483
+ "eval_steps_per_second": 0.231,
484
+ "eval_wer": 4.320987654320987,
485
+ "step": 28
486
+ },
487
+ {
488
+ "epoch": 29.0,
489
+ "grad_norm": 0.10369043797254562,
490
+ "learning_rate": 1e-05,
491
+ "loss": 0.0179,
492
+ "step": 29
493
+ },
494
+ {
495
+ "epoch": 29.0,
496
+ "eval_loss": 0.7425273060798645,
497
+ "eval_model_preparation_time": 0.0121,
498
+ "eval_runtime": 13.024,
499
+ "eval_samples_per_second": 1.536,
500
+ "eval_steps_per_second": 0.23,
501
+ "eval_wer": 4.012345679012346,
502
+ "step": 29
503
+ },
504
+ {
505
+ "epoch": 30.0,
506
+ "grad_norm": 0.10316835343837738,
507
+ "learning_rate": 1e-05,
508
+ "loss": 0.0177,
509
+ "step": 30
510
+ },
511
+ {
512
+ "epoch": 30.0,
513
+ "eval_loss": 0.7353512644767761,
514
+ "eval_model_preparation_time": 0.0121,
515
+ "eval_runtime": 13.0504,
516
+ "eval_samples_per_second": 1.533,
517
+ "eval_steps_per_second": 0.23,
518
+ "eval_wer": 4.012345679012346,
519
+ "step": 30
520
+ },
521
+ {
522
+ "epoch": 31.0,
523
+ "grad_norm": 0.10271348059177399,
524
+ "learning_rate": 1e-05,
525
+ "loss": 0.0175,
526
+ "step": 31
527
+ },
528
+ {
529
+ "epoch": 31.0,
530
+ "eval_loss": 0.7285407185554504,
531
+ "eval_model_preparation_time": 0.0121,
532
+ "eval_runtime": 13.0146,
533
+ "eval_samples_per_second": 1.537,
534
+ "eval_steps_per_second": 0.231,
535
+ "eval_wer": 4.012345679012346,
536
+ "step": 31
537
+ },
538
+ {
539
+ "epoch": 32.0,
540
+ "grad_norm": 0.10236351937055588,
541
+ "learning_rate": 1e-05,
542
+ "loss": 0.0174,
543
+ "step": 32
544
+ },
545
+ {
546
+ "epoch": 32.0,
547
+ "eval_loss": 0.7218928337097168,
548
+ "eval_model_preparation_time": 0.0121,
549
+ "eval_runtime": 12.9915,
550
+ "eval_samples_per_second": 1.539,
551
+ "eval_steps_per_second": 0.231,
552
+ "eval_wer": 4.012345679012346,
553
+ "step": 32
554
+ },
555
+ {
556
+ "epoch": 33.0,
557
+ "grad_norm": 0.1020871251821518,
558
+ "learning_rate": 1e-05,
559
+ "loss": 0.0172,
560
+ "step": 33
561
+ },
562
+ {
563
+ "epoch": 33.0,
564
+ "eval_loss": 0.7155935764312744,
565
+ "eval_model_preparation_time": 0.0121,
566
+ "eval_runtime": 13.03,
567
+ "eval_samples_per_second": 1.535,
568
+ "eval_steps_per_second": 0.23,
569
+ "eval_wer": 4.012345679012346,
570
+ "step": 33
571
+ },
572
+ {
573
+ "epoch": 34.0,
574
+ "grad_norm": 0.10186725854873657,
575
+ "learning_rate": 1e-05,
576
+ "loss": 0.0171,
577
+ "step": 34
578
+ },
579
+ {
580
+ "epoch": 34.0,
581
+ "eval_loss": 0.7092947959899902,
582
+ "eval_model_preparation_time": 0.0121,
583
+ "eval_runtime": 13.0366,
584
+ "eval_samples_per_second": 1.534,
585
+ "eval_steps_per_second": 0.23,
586
+ "eval_wer": 4.012345679012346,
587
+ "step": 34
588
+ },
589
+ {
590
+ "epoch": 35.0,
591
+ "grad_norm": 0.10165821760892868,
592
+ "learning_rate": 1e-05,
593
+ "loss": 0.0169,
594
+ "step": 35
595
+ },
596
+ {
597
+ "epoch": 35.0,
598
+ "eval_loss": 0.703009307384491,
599
+ "eval_model_preparation_time": 0.0121,
600
+ "eval_runtime": 13.0457,
601
+ "eval_samples_per_second": 1.533,
602
+ "eval_steps_per_second": 0.23,
603
+ "eval_wer": 4.012345679012346,
604
+ "step": 35
605
+ },
606
+ {
607
+ "epoch": 36.0,
608
+ "grad_norm": 0.10144059360027313,
609
+ "learning_rate": 1e-05,
610
+ "loss": 0.0167,
611
+ "step": 36
612
+ },
613
+ {
614
+ "epoch": 36.0,
615
+ "eval_loss": 0.6968026161193848,
616
+ "eval_model_preparation_time": 0.0121,
617
+ "eval_runtime": 13.0191,
618
+ "eval_samples_per_second": 1.536,
619
+ "eval_steps_per_second": 0.23,
620
+ "eval_wer": 4.012345679012346,
621
+ "step": 36
622
+ },
623
+ {
624
+ "epoch": 37.0,
625
+ "grad_norm": 0.1012805923819542,
626
+ "learning_rate": 1e-05,
627
+ "loss": 0.0166,
628
+ "step": 37
629
+ },
630
+ {
631
+ "epoch": 37.0,
632
+ "eval_loss": 0.6906681060791016,
633
+ "eval_model_preparation_time": 0.0121,
634
+ "eval_runtime": 13.0242,
635
+ "eval_samples_per_second": 1.536,
636
+ "eval_steps_per_second": 0.23,
637
+ "eval_wer": 4.012345679012346,
638
+ "step": 37
639
+ },
640
+ {
641
+ "epoch": 38.0,
642
+ "grad_norm": 0.10113769769668579,
643
+ "learning_rate": 1e-05,
644
+ "loss": 0.0164,
645
+ "step": 38
646
+ },
647
+ {
648
+ "epoch": 38.0,
649
+ "eval_loss": 0.6843755841255188,
650
+ "eval_model_preparation_time": 0.0121,
651
+ "eval_runtime": 13.0667,
652
+ "eval_samples_per_second": 1.531,
653
+ "eval_steps_per_second": 0.23,
654
+ "eval_wer": 4.012345679012346,
655
+ "step": 38
656
+ },
657
+ {
658
+ "epoch": 39.0,
659
+ "grad_norm": 0.10098178684711456,
660
+ "learning_rate": 1e-05,
661
+ "loss": 0.0162,
662
+ "step": 39
663
+ },
664
+ {
665
+ "epoch": 39.0,
666
+ "eval_loss": 0.6783390045166016,
667
+ "eval_model_preparation_time": 0.0121,
668
+ "eval_runtime": 13.0465,
669
+ "eval_samples_per_second": 1.533,
670
+ "eval_steps_per_second": 0.23,
671
+ "eval_wer": 4.012345679012346,
672
+ "step": 39
673
+ },
674
+ {
675
+ "epoch": 40.0,
676
+ "grad_norm": 0.1008835956454277,
677
+ "learning_rate": 1e-05,
678
+ "loss": 0.0161,
679
+ "step": 40
680
+ },
681
+ {
682
+ "epoch": 40.0,
683
+ "eval_loss": 0.6723348498344421,
684
+ "eval_model_preparation_time": 0.0121,
685
+ "eval_runtime": 13.0472,
686
+ "eval_samples_per_second": 1.533,
687
+ "eval_steps_per_second": 0.23,
688
+ "eval_wer": 4.012345679012346,
689
+ "step": 40
690
+ },
691
+ {
692
+ "epoch": 41.0,
693
+ "grad_norm": 0.10077005624771118,
694
+ "learning_rate": 1e-05,
695
+ "loss": 0.0159,
696
+ "step": 41
697
+ },
698
+ {
699
+ "epoch": 41.0,
700
+ "eval_loss": 0.6662711501121521,
701
+ "eval_model_preparation_time": 0.0121,
702
+ "eval_runtime": 13.0732,
703
+ "eval_samples_per_second": 1.53,
704
+ "eval_steps_per_second": 0.229,
705
+ "eval_wer": 4.012345679012346,
706
+ "step": 41
707
+ },
708
+ {
709
+ "epoch": 42.0,
710
+ "grad_norm": 0.1006632074713707,
711
+ "learning_rate": 1e-05,
712
+ "loss": 0.0158,
713
+ "step": 42
714
+ },
715
+ {
716
+ "epoch": 42.0,
717
+ "eval_loss": 0.6601687073707581,
718
+ "eval_model_preparation_time": 0.0121,
719
+ "eval_runtime": 13.104,
720
+ "eval_samples_per_second": 1.526,
721
+ "eval_steps_per_second": 0.229,
722
+ "eval_wer": 4.012345679012346,
723
+ "step": 42
724
+ },
725
+ {
726
+ "epoch": 43.0,
727
+ "grad_norm": 0.10058455169200897,
728
+ "learning_rate": 1e-05,
729
+ "loss": 0.0156,
730
+ "step": 43
731
+ },
732
+ {
733
+ "epoch": 43.0,
734
+ "eval_loss": 0.6541077494621277,
735
+ "eval_model_preparation_time": 0.0121,
736
+ "eval_runtime": 13.0457,
737
+ "eval_samples_per_second": 1.533,
738
+ "eval_steps_per_second": 0.23,
739
+ "eval_wer": 4.012345679012346,
740
+ "step": 43
741
+ },
742
+ {
743
+ "epoch": 44.0,
744
+ "grad_norm": 0.10048093646764755,
745
+ "learning_rate": 1e-05,
746
+ "loss": 0.0154,
747
+ "step": 44
748
+ },
749
+ {
750
+ "epoch": 44.0,
751
+ "eval_loss": 0.6480630040168762,
752
+ "eval_model_preparation_time": 0.0121,
753
+ "eval_runtime": 13.0213,
754
+ "eval_samples_per_second": 1.536,
755
+ "eval_steps_per_second": 0.23,
756
+ "eval_wer": 4.012345679012346,
757
+ "step": 44
758
+ },
759
+ {
760
+ "epoch": 45.0,
761
+ "grad_norm": 0.10042457282543182,
762
+ "learning_rate": 1e-05,
763
+ "loss": 0.0153,
764
+ "step": 45
765
+ },
766
+ {
767
+ "epoch": 45.0,
768
+ "eval_loss": 0.6420673131942749,
769
+ "eval_model_preparation_time": 0.0121,
770
+ "eval_runtime": 12.994,
771
+ "eval_samples_per_second": 1.539,
772
+ "eval_steps_per_second": 0.231,
773
+ "eval_wer": 4.012345679012346,
774
+ "step": 45
775
+ },
776
+ {
777
+ "epoch": 46.0,
778
+ "grad_norm": 0.1003439873456955,
779
+ "learning_rate": 1e-05,
780
+ "loss": 0.0151,
781
+ "step": 46
782
+ },
783
+ {
784
+ "epoch": 46.0,
785
+ "eval_loss": 0.6359073519706726,
786
+ "eval_model_preparation_time": 0.0121,
787
+ "eval_runtime": 13.0936,
788
+ "eval_samples_per_second": 1.527,
789
+ "eval_steps_per_second": 0.229,
790
+ "eval_wer": 4.012345679012346,
791
+ "step": 46
792
+ },
793
+ {
794
+ "epoch": 47.0,
795
+ "grad_norm": 0.10026626288890839,
796
+ "learning_rate": 1e-05,
797
+ "loss": 0.0149,
798
+ "step": 47
799
+ },
800
+ {
801
+ "epoch": 47.0,
802
+ "eval_loss": 0.6298264861106873,
803
+ "eval_model_preparation_time": 0.0121,
804
+ "eval_runtime": 13.0519,
805
+ "eval_samples_per_second": 1.532,
806
+ "eval_steps_per_second": 0.23,
807
+ "eval_wer": 4.012345679012346,
808
+ "step": 47
809
+ },
810
+ {
811
+ "epoch": 48.0,
812
+ "grad_norm": 0.10018378496170044,
813
+ "learning_rate": 1e-05,
814
+ "loss": 0.0148,
815
+ "step": 48
816
+ },
817
+ {
818
+ "epoch": 48.0,
819
+ "eval_loss": 0.6237847208976746,
820
+ "eval_model_preparation_time": 0.0121,
821
+ "eval_runtime": 13.1033,
822
+ "eval_samples_per_second": 1.526,
823
+ "eval_steps_per_second": 0.229,
824
+ "eval_wer": 4.012345679012346,
825
+ "step": 48
826
+ },
827
+ {
828
+ "epoch": 49.0,
829
+ "grad_norm": 0.10017245262861252,
830
+ "learning_rate": 1e-05,
831
+ "loss": 0.0146,
832
+ "step": 49
833
+ },
834
+ {
835
+ "epoch": 49.0,
836
+ "eval_loss": 0.617721438407898,
837
+ "eval_model_preparation_time": 0.0121,
838
+ "eval_runtime": 13.0808,
839
+ "eval_samples_per_second": 1.529,
840
+ "eval_steps_per_second": 0.229,
841
+ "eval_wer": 4.012345679012346,
842
+ "step": 49
843
+ },
844
+ {
845
+ "epoch": 50.0,
846
+ "grad_norm": 0.10012275725603104,
847
+ "learning_rate": 1e-05,
848
+ "loss": 0.0144,
849
+ "step": 50
850
+ },
851
+ {
852
+ "epoch": 50.0,
853
+ "eval_loss": 0.6115495562553406,
854
+ "eval_model_preparation_time": 0.0121,
855
+ "eval_runtime": 13.0424,
856
+ "eval_samples_per_second": 1.533,
857
+ "eval_steps_per_second": 0.23,
858
+ "eval_wer": 4.012345679012346,
859
+ "step": 50
860
+ },
861
+ {
862
+ "epoch": 51.0,
863
+ "grad_norm": 0.10007242113351822,
864
+ "learning_rate": 1e-05,
865
+ "loss": 0.0142,
866
+ "step": 51
867
+ },
868
+ {
869
+ "epoch": 51.0,
870
+ "eval_loss": 0.6055248379707336,
871
+ "eval_model_preparation_time": 0.0121,
872
+ "eval_runtime": 13.1155,
873
+ "eval_samples_per_second": 1.525,
874
+ "eval_steps_per_second": 0.229,
875
+ "eval_wer": 4.012345679012346,
876
+ "step": 51
877
+ },
878
+ {
879
+ "epoch": 52.0,
880
+ "grad_norm": 0.1000462993979454,
881
+ "learning_rate": 1e-05,
882
+ "loss": 0.0141,
883
+ "step": 52
884
+ },
885
+ {
886
+ "epoch": 52.0,
887
+ "eval_loss": 0.5995514988899231,
888
+ "eval_model_preparation_time": 0.0121,
889
+ "eval_runtime": 13.0374,
890
+ "eval_samples_per_second": 1.534,
891
+ "eval_steps_per_second": 0.23,
892
+ "eval_wer": 4.012345679012346,
893
+ "step": 52
894
+ },
895
+ {
896
+ "epoch": 53.0,
897
+ "grad_norm": 0.09999983012676239,
898
+ "learning_rate": 1e-05,
899
+ "loss": 0.0139,
900
+ "step": 53
901
+ },
902
+ {
903
+ "epoch": 53.0,
904
+ "eval_loss": 0.5934532284736633,
905
+ "eval_model_preparation_time": 0.0121,
906
+ "eval_runtime": 13.0708,
907
+ "eval_samples_per_second": 1.53,
908
+ "eval_steps_per_second": 0.23,
909
+ "eval_wer": 4.012345679012346,
910
+ "step": 53
911
+ },
912
+ {
913
+ "epoch": 54.0,
914
+ "grad_norm": 0.10001446306705475,
915
+ "learning_rate": 1e-05,
916
+ "loss": 0.0137,
917
+ "step": 54
918
+ },
919
+ {
920
+ "epoch": 54.0,
921
+ "eval_loss": 0.5873024463653564,
922
+ "eval_model_preparation_time": 0.0121,
923
+ "eval_runtime": 13.0551,
924
+ "eval_samples_per_second": 1.532,
925
+ "eval_steps_per_second": 0.23,
926
+ "eval_wer": 4.012345679012346,
927
+ "step": 54
928
+ },
929
+ {
930
+ "epoch": 55.0,
931
+ "grad_norm": 0.09999886900186539,
932
+ "learning_rate": 1e-05,
933
+ "loss": 0.0136,
934
+ "step": 55
935
+ },
936
+ {
937
+ "epoch": 55.0,
938
+ "eval_loss": 0.5811672210693359,
939
+ "eval_model_preparation_time": 0.0121,
940
+ "eval_runtime": 13.0575,
941
+ "eval_samples_per_second": 1.532,
942
+ "eval_steps_per_second": 0.23,
943
+ "eval_wer": 4.012345679012346,
944
+ "step": 55
945
+ },
946
+ {
947
+ "epoch": 56.0,
948
+ "grad_norm": 0.09996996074914932,
949
+ "learning_rate": 1e-05,
950
+ "loss": 0.0134,
951
+ "step": 56
952
+ },
953
+ {
954
+ "epoch": 56.0,
955
+ "eval_loss": 0.5749332308769226,
956
+ "eval_model_preparation_time": 0.0121,
957
+ "eval_runtime": 13.0147,
958
+ "eval_samples_per_second": 1.537,
959
+ "eval_steps_per_second": 0.231,
960
+ "eval_wer": 4.012345679012346,
961
+ "step": 56
962
+ },
963
+ {
964
+ "epoch": 57.0,
965
+ "grad_norm": 0.09995821118354797,
966
+ "learning_rate": 1e-05,
967
+ "loss": 0.0132,
968
+ "step": 57
969
+ },
970
+ {
971
+ "epoch": 57.0,
972
+ "eval_loss": 0.5688786506652832,
973
+ "eval_model_preparation_time": 0.0121,
974
+ "eval_runtime": 13.0009,
975
+ "eval_samples_per_second": 1.538,
976
+ "eval_steps_per_second": 0.231,
977
+ "eval_wer": 4.012345679012346,
978
+ "step": 57
979
+ },
980
+ {
981
+ "epoch": 58.0,
982
+ "grad_norm": 0.09996328502893448,
983
+ "learning_rate": 1e-05,
984
+ "loss": 0.013,
985
+ "step": 58
986
+ },
987
+ {
988
+ "epoch": 58.0,
989
+ "eval_loss": 0.5627100467681885,
990
+ "eval_model_preparation_time": 0.0121,
991
+ "eval_runtime": 13.0166,
992
+ "eval_samples_per_second": 1.536,
993
+ "eval_steps_per_second": 0.23,
994
+ "eval_wer": 4.012345679012346,
995
+ "step": 58
996
+ },
997
+ {
998
+ "epoch": 59.0,
999
+ "grad_norm": 0.09995493292808533,
1000
+ "learning_rate": 1e-05,
1001
+ "loss": 0.0128,
1002
+ "step": 59
1003
+ },
1004
+ {
1005
+ "epoch": 59.0,
1006
+ "eval_loss": 0.5565866827964783,
1007
+ "eval_model_preparation_time": 0.0121,
1008
+ "eval_runtime": 13.0199,
1009
+ "eval_samples_per_second": 1.536,
1010
+ "eval_steps_per_second": 0.23,
1011
+ "eval_wer": 4.012345679012346,
1012
+ "step": 59
1013
+ },
1014
+ {
1015
+ "epoch": 60.0,
1016
+ "grad_norm": 0.09997319430112839,
1017
+ "learning_rate": 1e-05,
1018
+ "loss": 0.0127,
1019
+ "step": 60
1020
+ },
1021
+ {
1022
+ "epoch": 60.0,
1023
+ "eval_loss": 0.5505630373954773,
1024
+ "eval_model_preparation_time": 0.0121,
1025
+ "eval_runtime": 13.0285,
1026
+ "eval_samples_per_second": 1.535,
1027
+ "eval_steps_per_second": 0.23,
1028
+ "eval_wer": 4.012345679012346,
1029
+ "step": 60
1030
+ },
1031
+ {
1032
+ "epoch": 61.0,
1033
+ "grad_norm": 0.0999804213643074,
1034
+ "learning_rate": 1e-05,
1035
+ "loss": 0.0125,
1036
+ "step": 61
1037
+ },
1038
+ {
1039
+ "epoch": 61.0,
1040
+ "eval_loss": 0.5444908738136292,
1041
+ "eval_model_preparation_time": 0.0121,
1042
+ "eval_runtime": 13.0398,
1043
+ "eval_samples_per_second": 1.534,
1044
+ "eval_steps_per_second": 0.23,
1045
+ "eval_wer": 4.012345679012346,
1046
+ "step": 61
1047
+ },
1048
+ {
1049
+ "epoch": 62.0,
1050
+ "grad_norm": 0.10000340640544891,
1051
+ "learning_rate": 1e-05,
1052
+ "loss": 0.0123,
1053
+ "step": 62
1054
+ },
1055
+ {
1056
+ "epoch": 62.0,
1057
+ "eval_loss": 0.5383059978485107,
1058
+ "eval_model_preparation_time": 0.0121,
1059
+ "eval_runtime": 13.0216,
1060
+ "eval_samples_per_second": 1.536,
1061
+ "eval_steps_per_second": 0.23,
1062
+ "eval_wer": 4.012345679012346,
1063
+ "step": 62
1064
+ },
1065
+ {
1066
+ "epoch": 63.0,
1067
+ "grad_norm": 0.10002919286489487,
1068
+ "learning_rate": 1e-05,
1069
+ "loss": 0.0121,
1070
+ "step": 63
1071
+ },
1072
+ {
1073
+ "epoch": 63.0,
1074
+ "eval_loss": 0.5322217345237732,
1075
+ "eval_model_preparation_time": 0.0121,
1076
+ "eval_runtime": 13.027,
1077
+ "eval_samples_per_second": 1.535,
1078
+ "eval_steps_per_second": 0.23,
1079
+ "eval_wer": 4.012345679012346,
1080
+ "step": 63
1081
+ },
1082
+ {
1083
+ "epoch": 64.0,
1084
+ "grad_norm": 0.10005642473697662,
1085
+ "learning_rate": 1e-05,
1086
+ "loss": 0.0119,
1087
+ "step": 64
1088
+ },
1089
+ {
1090
+ "epoch": 64.0,
1091
+ "eval_loss": 0.5260897278785706,
1092
+ "eval_model_preparation_time": 0.0121,
1093
+ "eval_runtime": 13.0762,
1094
+ "eval_samples_per_second": 1.529,
1095
+ "eval_steps_per_second": 0.229,
1096
+ "eval_wer": 4.012345679012346,
1097
+ "step": 64
1098
+ },
1099
+ {
1100
+ "epoch": 65.0,
1101
+ "grad_norm": 0.1000811904668808,
1102
+ "learning_rate": 1e-05,
1103
+ "loss": 0.0117,
1104
+ "step": 65
1105
+ },
1106
+ {
1107
+ "epoch": 65.0,
1108
+ "eval_loss": 0.5198546648025513,
1109
+ "eval_model_preparation_time": 0.0121,
1110
+ "eval_runtime": 13.0323,
1111
+ "eval_samples_per_second": 1.535,
1112
+ "eval_steps_per_second": 0.23,
1113
+ "eval_wer": 4.012345679012346,
1114
+ "step": 65
1115
+ },
1116
+ {
1117
+ "epoch": 66.0,
1118
+ "grad_norm": 0.10011852532625198,
1119
+ "learning_rate": 1e-05,
1120
+ "loss": 0.0115,
1121
+ "step": 66
1122
+ },
1123
+ {
1124
+ "epoch": 66.0,
1125
+ "eval_loss": 0.513763964176178,
1126
+ "eval_model_preparation_time": 0.0121,
1127
+ "eval_runtime": 13.0426,
1128
+ "eval_samples_per_second": 1.533,
1129
+ "eval_steps_per_second": 0.23,
1130
+ "eval_wer": 4.012345679012346,
1131
+ "step": 66
1132
+ },
1133
+ {
1134
+ "epoch": 67.0,
1135
+ "grad_norm": 0.10016665607690811,
1136
+ "learning_rate": 1e-05,
1137
+ "loss": 0.0114,
1138
+ "step": 67
1139
+ },
1140
+ {
1141
+ "epoch": 67.0,
1142
+ "eval_loss": 0.5077674388885498,
1143
+ "eval_model_preparation_time": 0.0121,
1144
+ "eval_runtime": 13.0559,
1145
+ "eval_samples_per_second": 1.532,
1146
+ "eval_steps_per_second": 0.23,
1147
+ "eval_wer": 4.012345679012346,
1148
+ "step": 67
1149
+ },
1150
+ {
1151
+ "epoch": 68.0,
1152
+ "grad_norm": 0.10022170841693878,
1153
+ "learning_rate": 1e-05,
1154
+ "loss": 0.0112,
1155
+ "step": 68
1156
+ },
1157
+ {
1158
+ "epoch": 68.0,
1159
+ "eval_loss": 0.5016047358512878,
1160
+ "eval_model_preparation_time": 0.0121,
1161
+ "eval_runtime": 13.0439,
1162
+ "eval_samples_per_second": 1.533,
1163
+ "eval_steps_per_second": 0.23,
1164
+ "eval_wer": 4.012345679012346,
1165
+ "step": 68
1166
+ },
1167
+ {
1168
+ "epoch": 69.0,
1169
+ "grad_norm": 0.10022887587547302,
1170
+ "learning_rate": 1e-05,
1171
+ "loss": 0.011,
1172
+ "step": 69
1173
+ },
1174
+ {
1175
+ "epoch": 69.0,
1176
+ "eval_loss": 0.49558281898498535,
1177
+ "eval_model_preparation_time": 0.0121,
1178
+ "eval_runtime": 13.0145,
1179
+ "eval_samples_per_second": 1.537,
1180
+ "eval_steps_per_second": 0.231,
1181
+ "eval_wer": 4.012345679012346,
1182
+ "step": 69
1183
+ },
1184
+ {
1185
+ "epoch": 70.0,
1186
+ "grad_norm": 0.10028593987226486,
1187
+ "learning_rate": 1e-05,
1188
+ "loss": 0.0108,
1189
+ "step": 70
1190
+ },
1191
+ {
1192
+ "epoch": 70.0,
1193
+ "eval_loss": 0.4895244538784027,
1194
+ "eval_model_preparation_time": 0.0121,
1195
+ "eval_runtime": 13.0373,
1196
+ "eval_samples_per_second": 1.534,
1197
+ "eval_steps_per_second": 0.23,
1198
+ "eval_wer": 4.012345679012346,
1199
+ "step": 70
1200
+ },
1201
+ {
1202
+ "epoch": 71.0,
1203
+ "grad_norm": 0.10034992545843124,
1204
+ "learning_rate": 1e-05,
1205
+ "loss": 0.0106,
1206
+ "step": 71
1207
+ },
1208
+ {
1209
+ "epoch": 71.0,
1210
+ "eval_loss": 0.4835710823535919,
1211
+ "eval_model_preparation_time": 0.0121,
1212
+ "eval_runtime": 12.9895,
1213
+ "eval_samples_per_second": 1.54,
1214
+ "eval_steps_per_second": 0.231,
1215
+ "eval_wer": 4.012345679012346,
1216
+ "step": 71
1217
+ },
1218
+ {
1219
+ "epoch": 72.0,
1220
+ "grad_norm": 0.10041330754756927,
1221
+ "learning_rate": 1e-05,
1222
+ "loss": 0.0104,
1223
+ "step": 72
1224
+ },
1225
+ {
1226
+ "epoch": 72.0,
1227
+ "eval_loss": 0.4775284230709076,
1228
+ "eval_model_preparation_time": 0.0121,
1229
+ "eval_runtime": 13.0996,
1230
+ "eval_samples_per_second": 1.527,
1231
+ "eval_steps_per_second": 0.229,
1232
+ "eval_wer": 4.012345679012346,
1233
+ "step": 72
1234
+ },
1235
+ {
1236
+ "epoch": 73.0,
1237
+ "grad_norm": 0.10046479851007462,
1238
+ "learning_rate": 1e-05,
1239
+ "loss": 0.0102,
1240
+ "step": 73
1241
+ },
1242
+ {
1243
+ "epoch": 73.0,
1244
+ "eval_loss": 0.47157832980155945,
1245
+ "eval_model_preparation_time": 0.0121,
1246
+ "eval_runtime": 13.0539,
1247
+ "eval_samples_per_second": 1.532,
1248
+ "eval_steps_per_second": 0.23,
1249
+ "eval_wer": 4.012345679012346,
1250
+ "step": 73
1251
+ },
1252
+ {
1253
+ "epoch": 74.0,
1254
+ "grad_norm": 0.10052894800901413,
1255
+ "learning_rate": 1e-05,
1256
+ "loss": 0.01,
1257
+ "step": 74
1258
+ },
1259
+ {
1260
+ "epoch": 74.0,
1261
+ "eval_loss": 0.4655916392803192,
1262
+ "eval_model_preparation_time": 0.0121,
1263
+ "eval_runtime": 13.0131,
1264
+ "eval_samples_per_second": 1.537,
1265
+ "eval_steps_per_second": 0.231,
1266
+ "eval_wer": 4.012345679012346,
1267
+ "step": 74
1268
+ },
1269
+ {
1270
+ "epoch": 75.0,
1271
+ "grad_norm": 0.10059099644422531,
1272
+ "learning_rate": 1e-05,
1273
+ "loss": 0.0098,
1274
+ "step": 75
1275
+ },
1276
+ {
1277
+ "epoch": 75.0,
1278
+ "eval_loss": 0.4597587287425995,
1279
+ "eval_model_preparation_time": 0.0121,
1280
+ "eval_runtime": 13.0097,
1281
+ "eval_samples_per_second": 1.537,
1282
+ "eval_steps_per_second": 0.231,
1283
+ "eval_wer": 4.012345679012346,
1284
+ "step": 75
1285
+ },
1286
+ {
1287
+ "epoch": 76.0,
1288
+ "grad_norm": 0.10066922754049301,
1289
+ "learning_rate": 1e-05,
1290
+ "loss": 0.0096,
1291
+ "step": 76
1292
+ },
1293
+ {
1294
+ "epoch": 76.0,
1295
+ "eval_loss": 0.4539148807525635,
1296
+ "eval_model_preparation_time": 0.0121,
1297
+ "eval_runtime": 13.1072,
1298
+ "eval_samples_per_second": 1.526,
1299
+ "eval_steps_per_second": 0.229,
1300
+ "eval_wer": 4.012345679012346,
1301
+ "step": 76
1302
+ },
1303
+ {
1304
+ "epoch": 77.0,
1305
+ "grad_norm": 0.10073923319578171,
1306
+ "learning_rate": 1e-05,
1307
+ "loss": 0.0094,
1308
+ "step": 77
1309
+ },
1310
+ {
1311
+ "epoch": 77.0,
1312
+ "eval_loss": 0.44823265075683594,
1313
+ "eval_model_preparation_time": 0.0121,
1314
+ "eval_runtime": 13.0093,
1315
+ "eval_samples_per_second": 1.537,
1316
+ "eval_steps_per_second": 0.231,
1317
+ "eval_wer": 4.012345679012346,
1318
+ "step": 77
1319
+ },
1320
+ {
1321
+ "epoch": 78.0,
1322
+ "grad_norm": 0.1008358970284462,
1323
+ "learning_rate": 1e-05,
1324
+ "loss": 0.0091,
1325
+ "step": 78
1326
+ },
1327
+ {
1328
+ "epoch": 78.0,
1329
+ "eval_loss": 0.442667692899704,
1330
+ "eval_model_preparation_time": 0.0121,
1331
+ "eval_runtime": 13.0167,
1332
+ "eval_samples_per_second": 1.536,
1333
+ "eval_steps_per_second": 0.23,
1334
+ "eval_wer": 4.012345679012346,
1335
+ "step": 78
1336
+ },
1337
+ {
1338
+ "epoch": 79.0,
1339
+ "grad_norm": 0.10090507566928864,
1340
+ "learning_rate": 1e-05,
1341
+ "loss": 0.0089,
1342
+ "step": 79
1343
+ },
1344
+ {
1345
+ "epoch": 79.0,
1346
+ "eval_loss": 0.43717432022094727,
1347
+ "eval_model_preparation_time": 0.0121,
1348
+ "eval_runtime": 13.0261,
1349
+ "eval_samples_per_second": 1.535,
1350
+ "eval_steps_per_second": 0.23,
1351
+ "eval_wer": 4.012345679012346,
1352
+ "step": 79
1353
+ },
1354
+ {
1355
+ "epoch": 80.0,
1356
+ "grad_norm": 0.10097309201955795,
1357
+ "learning_rate": 1e-05,
1358
+ "loss": 0.0087,
1359
+ "step": 80
1360
+ },
1361
+ {
1362
+ "epoch": 80.0,
1363
+ "eval_loss": 0.43188872933387756,
1364
+ "eval_model_preparation_time": 0.0121,
1365
+ "eval_runtime": 13.0257,
1366
+ "eval_samples_per_second": 1.535,
1367
+ "eval_steps_per_second": 0.23,
1368
+ "eval_wer": 4.012345679012346,
1369
+ "step": 80
1370
+ },
1371
+ {
1372
+ "epoch": 81.0,
1373
+ "grad_norm": 0.10104646533727646,
1374
+ "learning_rate": 1e-05,
1375
+ "loss": 0.0085,
1376
+ "step": 81
1377
+ },
1378
+ {
1379
+ "epoch": 81.0,
1380
+ "eval_loss": 0.4267992079257965,
1381
+ "eval_model_preparation_time": 0.0121,
1382
+ "eval_runtime": 13.0831,
1383
+ "eval_samples_per_second": 1.529,
1384
+ "eval_steps_per_second": 0.229,
1385
+ "eval_wer": 4.012345679012346,
1386
+ "step": 81
1387
+ },
1388
+ {
1389
+ "epoch": 82.0,
1390
+ "grad_norm": 0.10112679749727249,
1391
+ "learning_rate": 1e-05,
1392
+ "loss": 0.0083,
1393
+ "step": 82
1394
+ },
1395
+ {
1396
+ "epoch": 82.0,
1397
+ "eval_loss": 0.4219636917114258,
1398
+ "eval_model_preparation_time": 0.0121,
1399
+ "eval_runtime": 13.0252,
1400
+ "eval_samples_per_second": 1.535,
1401
+ "eval_steps_per_second": 0.23,
1402
+ "eval_wer": 4.012345679012346,
1403
+ "step": 82
1404
+ },
1405
+ {
1406
+ "epoch": 83.0,
1407
+ "grad_norm": 0.10117711871862411,
1408
+ "learning_rate": 1e-05,
1409
+ "loss": 0.008,
1410
+ "step": 83
1411
+ },
1412
+ {
1413
+ "epoch": 83.0,
1414
+ "eval_loss": 0.4172472655773163,
1415
+ "eval_model_preparation_time": 0.0121,
1416
+ "eval_runtime": 13.0112,
1417
+ "eval_samples_per_second": 1.537,
1418
+ "eval_steps_per_second": 0.231,
1419
+ "eval_wer": 4.012345679012346,
1420
+ "step": 83
1421
+ },
1422
+ {
1423
+ "epoch": 84.0,
1424
+ "grad_norm": 0.1012597382068634,
1425
+ "learning_rate": 1e-05,
1426
+ "loss": 0.0078,
1427
+ "step": 84
1428
+ },
1429
+ {
1430
+ "epoch": 84.0,
1431
+ "eval_loss": 0.41286230087280273,
1432
+ "eval_model_preparation_time": 0.0121,
1433
+ "eval_runtime": 13.0229,
1434
+ "eval_samples_per_second": 1.536,
1435
+ "eval_steps_per_second": 0.23,
1436
+ "eval_wer": 4.012345679012346,
1437
+ "step": 84
1438
+ },
1439
+ {
1440
+ "epoch": 85.0,
1441
+ "grad_norm": 0.101369209587574,
1442
+ "learning_rate": 1e-05,
1443
+ "loss": 0.0076,
1444
+ "step": 85
1445
+ },
1446
+ {
1447
+ "epoch": 85.0,
1448
+ "eval_loss": 0.40860816836357117,
1449
+ "eval_model_preparation_time": 0.0121,
1450
+ "eval_runtime": 13.0992,
1451
+ "eval_samples_per_second": 1.527,
1452
+ "eval_steps_per_second": 0.229,
1453
+ "eval_wer": 4.012345679012346,
1454
+ "step": 85
1455
+ },
1456
+ {
1457
+ "epoch": 86.0,
1458
+ "grad_norm": 0.10140149295330048,
1459
+ "learning_rate": 1e-05,
1460
+ "loss": 0.0073,
1461
+ "step": 86
1462
+ },
1463
+ {
1464
+ "epoch": 86.0,
1465
+ "eval_loss": 0.4042428433895111,
1466
+ "eval_model_preparation_time": 0.0121,
1467
+ "eval_runtime": 13.03,
1468
+ "eval_samples_per_second": 1.535,
1469
+ "eval_steps_per_second": 0.23,
1470
+ "eval_wer": 4.012345679012346,
1471
+ "step": 86
1472
+ },
1473
+ {
1474
+ "epoch": 87.0,
1475
+ "grad_norm": 0.10143295675516129,
1476
+ "learning_rate": 1e-05,
1477
+ "loss": 0.0071,
1478
+ "step": 87
1479
+ },
1480
+ {
1481
+ "epoch": 87.0,
1482
+ "eval_loss": 0.3998439311981201,
1483
+ "eval_model_preparation_time": 0.0121,
1484
+ "eval_runtime": 13.005,
1485
+ "eval_samples_per_second": 1.538,
1486
+ "eval_steps_per_second": 0.231,
1487
+ "eval_wer": 4.012345679012346,
1488
+ "step": 87
1489
+ },
1490
+ {
1491
+ "epoch": 88.0,
1492
+ "grad_norm": 0.1014971062541008,
1493
+ "learning_rate": 1e-05,
1494
+ "loss": 0.0069,
1495
+ "step": 88
1496
+ },
1497
+ {
1498
+ "epoch": 88.0,
1499
+ "eval_loss": 0.39532041549682617,
1500
+ "eval_model_preparation_time": 0.0121,
1501
+ "eval_runtime": 13.0078,
1502
+ "eval_samples_per_second": 1.538,
1503
+ "eval_steps_per_second": 0.231,
1504
+ "eval_wer": 4.012345679012346,
1505
+ "step": 88
1506
+ },
1507
+ {
1508
+ "epoch": 89.0,
1509
+ "grad_norm": 0.10153479129076004,
1510
+ "learning_rate": 1e-05,
1511
+ "loss": 0.0066,
1512
+ "step": 89
1513
+ },
1514
+ {
1515
+ "epoch": 89.0,
1516
+ "eval_loss": 0.3908376693725586,
1517
+ "eval_model_preparation_time": 0.0121,
1518
+ "eval_runtime": 13.0794,
1519
+ "eval_samples_per_second": 1.529,
1520
+ "eval_steps_per_second": 0.229,
1521
+ "eval_wer": 4.012345679012346,
1522
+ "step": 89
1523
+ },
1524
+ {
1525
+ "epoch": 90.0,
1526
+ "grad_norm": 0.10166627168655396,
1527
+ "learning_rate": 1e-05,
1528
+ "loss": 0.0064,
1529
+ "step": 90
1530
+ },
1531
+ {
1532
+ "epoch": 90.0,
1533
+ "eval_loss": 0.3862927258014679,
1534
+ "eval_model_preparation_time": 0.0121,
1535
+ "eval_runtime": 13.0683,
1536
+ "eval_samples_per_second": 1.53,
1537
+ "eval_steps_per_second": 0.23,
1538
+ "eval_wer": 4.320987654320987,
1539
+ "step": 90
1540
+ },
1541
+ {
1542
+ "epoch": 91.0,
1543
+ "grad_norm": 0.10168776661157608,
1544
+ "learning_rate": 1e-05,
1545
+ "loss": 0.0062,
1546
+ "step": 91
1547
+ },
1548
+ {
1549
+ "epoch": 91.0,
1550
+ "eval_loss": 0.38115009665489197,
1551
+ "eval_model_preparation_time": 0.0121,
1552
+ "eval_runtime": 13.0067,
1553
+ "eval_samples_per_second": 1.538,
1554
+ "eval_steps_per_second": 0.231,
1555
+ "eval_wer": 4.320987654320987,
1556
+ "step": 91
1557
+ },
1558
+ {
1559
+ "epoch": 92.0,
1560
+ "grad_norm": 0.10173481702804565,
1561
+ "learning_rate": 1e-05,
1562
+ "loss": 0.0059,
1563
+ "step": 92
1564
+ },
1565
+ {
1566
+ "epoch": 92.0,
1567
+ "eval_loss": 0.37574759125709534,
1568
+ "eval_model_preparation_time": 0.0121,
1569
+ "eval_runtime": 13.045,
1570
+ "eval_samples_per_second": 1.533,
1571
+ "eval_steps_per_second": 0.23,
1572
+ "eval_wer": 4.320987654320987,
1573
+ "step": 92
1574
+ },
1575
+ {
1576
+ "epoch": 93.0,
1577
+ "grad_norm": 0.1017962321639061,
1578
+ "learning_rate": 1e-05,
1579
+ "loss": 0.0057,
1580
+ "step": 93
1581
+ },
1582
+ {
1583
+ "epoch": 93.0,
1584
+ "eval_loss": 0.3700896203517914,
1585
+ "eval_model_preparation_time": 0.0121,
1586
+ "eval_runtime": 12.9624,
1587
+ "eval_samples_per_second": 1.543,
1588
+ "eval_steps_per_second": 0.231,
1589
+ "eval_wer": 4.320987654320987,
1590
+ "step": 93
1591
+ },
1592
+ {
1593
+ "epoch": 94.0,
1594
+ "grad_norm": 0.10191880166530609,
1595
+ "learning_rate": 1e-05,
1596
+ "loss": 0.0055,
1597
+ "step": 94
1598
+ },
1599
+ {
1600
+ "epoch": 94.0,
1601
+ "eval_loss": 0.3639739453792572,
1602
+ "eval_model_preparation_time": 0.0121,
1603
+ "eval_runtime": 12.9926,
1604
+ "eval_samples_per_second": 1.539,
1605
+ "eval_steps_per_second": 0.231,
1606
+ "eval_wer": 4.320987654320987,
1607
+ "step": 94
1608
+ },
1609
+ {
1610
+ "epoch": 95.0,
1611
+ "grad_norm": 0.10210127383470535,
1612
+ "learning_rate": 1e-05,
1613
+ "loss": 0.0053,
1614
+ "step": 95
1615
+ },
1616
+ {
1617
+ "epoch": 95.0,
1618
+ "eval_loss": 0.3575873374938965,
1619
+ "eval_model_preparation_time": 0.0121,
1620
+ "eval_runtime": 12.9753,
1621
+ "eval_samples_per_second": 1.541,
1622
+ "eval_steps_per_second": 0.231,
1623
+ "eval_wer": 5.246913580246913,
1624
+ "step": 95
1625
+ },
1626
+ {
1627
+ "epoch": 96.0,
1628
+ "grad_norm": 0.10229586809873581,
1629
+ "learning_rate": 1e-05,
1630
+ "loss": 0.005,
1631
+ "step": 96
1632
+ },
1633
+ {
1634
+ "epoch": 96.0,
1635
+ "eval_loss": 0.351242333650589,
1636
+ "eval_model_preparation_time": 0.0121,
1637
+ "eval_runtime": 12.9627,
1638
+ "eval_samples_per_second": 1.543,
1639
+ "eval_steps_per_second": 0.231,
1640
+ "eval_wer": 5.246913580246913,
1641
+ "step": 96
1642
+ },
1643
+ {
1644
+ "epoch": 97.0,
1645
+ "grad_norm": 0.10258200764656067,
1646
+ "learning_rate": 1e-05,
1647
+ "loss": 0.0048,
1648
+ "step": 97
1649
+ },
1650
+ {
1651
+ "epoch": 97.0,
1652
+ "eval_loss": 0.34528884291648865,
1653
+ "eval_model_preparation_time": 0.0121,
1654
+ "eval_runtime": 12.9959,
1655
+ "eval_samples_per_second": 1.539,
1656
+ "eval_steps_per_second": 0.231,
1657
+ "eval_wer": 5.246913580246913,
1658
+ "step": 97
1659
+ },
1660
+ {
1661
+ "epoch": 98.0,
1662
+ "grad_norm": 0.1028934046626091,
1663
+ "learning_rate": 1e-05,
1664
+ "loss": 0.0046,
1665
+ "step": 98
1666
+ },
1667
+ {
1668
+ "epoch": 98.0,
1669
+ "eval_loss": 0.3395444452762604,
1670
+ "eval_model_preparation_time": 0.0121,
1671
+ "eval_runtime": 13.0387,
1672
+ "eval_samples_per_second": 1.534,
1673
+ "eval_steps_per_second": 0.23,
1674
+ "eval_wer": 5.246913580246913,
1675
+ "step": 98
1676
+ },
1677
+ {
1678
+ "epoch": 99.0,
1679
+ "grad_norm": 0.10328880697488785,
1680
+ "learning_rate": 1e-05,
1681
+ "loss": 0.0044,
1682
+ "step": 99
1683
+ },
1684
+ {
1685
+ "epoch": 99.0,
1686
+ "eval_loss": 0.33414900302886963,
1687
+ "eval_model_preparation_time": 0.0121,
1688
+ "eval_runtime": 13.0468,
1689
+ "eval_samples_per_second": 1.533,
1690
+ "eval_steps_per_second": 0.23,
1691
+ "eval_wer": 5.864197530864197,
1692
+ "step": 99
1693
+ },
1694
+ {
1695
+ "epoch": 100.0,
1696
+ "grad_norm": 0.10364117473363876,
1697
+ "learning_rate": 1e-05,
1698
+ "loss": 0.0042,
1699
+ "step": 100
1700
+ },
1701
+ {
1702
+ "epoch": 100.0,
1703
+ "eval_loss": 0.3294712007045746,
1704
+ "eval_model_preparation_time": 0.0121,
1705
+ "eval_runtime": 13.0722,
1706
+ "eval_samples_per_second": 1.53,
1707
+ "eval_steps_per_second": 0.229,
1708
+ "eval_wer": 5.864197530864197,
1709
+ "step": 100
1710
+ }
1711
+ ],
1712
+ "logging_steps": 1,
1713
+ "max_steps": 100,
1714
+ "num_input_tokens_seen": 0,
1715
+ "num_train_epochs": 100,
1716
+ "save_steps": 1000,
1717
+ "stateful_callbacks": {
1718
+ "EarlyStoppingCallback": {
1719
+ "args": {
1720
+ "early_stopping_patience": 3,
1721
+ "early_stopping_threshold": 0.0
1722
+ },
1723
+ "attributes": {
1724
+ "early_stopping_patience_counter": 0
1725
+ }
1726
+ },
1727
+ "TrainerControl": {
1728
+ "args": {
1729
+ "should_epoch_stop": false,
1730
+ "should_evaluate": false,
1731
+ "should_log": false,
1732
+ "should_save": true,
1733
+ "should_training_stop": true
1734
+ },
1735
+ "attributes": {}
1736
+ }
1737
+ },
1738
+ "total_flos": 6.12362944512e+17,
1739
+ "train_batch_size": 16,
1740
+ "trial_name": null,
1741
+ "trial_params": null
1742
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcebb509612029e0a3f035bd0445279e2aaf89270c378a9491d79b32a080d433
3
+ size 5496