EnvyIrys commited on
Commit
81403bd
·
verified ·
1 Parent(s): f1cde0b

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai/whisper-medium",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "apply_spec_augment": false,
6
+ "architectures": [
7
+ "WhisperForConditionalGeneration"
8
+ ],
9
+ "attention_dropout": 0.0,
10
+ "begin_suppress_tokens": null,
11
+ "bos_token_id": 50257,
12
+ "classifier_proj_size": 256,
13
+ "d_model": 1024,
14
+ "decoder_attention_heads": 16,
15
+ "decoder_ffn_dim": 4096,
16
+ "decoder_layerdrop": 0.0,
17
+ "decoder_layers": 24,
18
+ "decoder_start_token_id": 50258,
19
+ "dropout": 0.0,
20
+ "encoder_attention_heads": 16,
21
+ "encoder_ffn_dim": 4096,
22
+ "encoder_layerdrop": 0.0,
23
+ "encoder_layers": 24,
24
+ "eos_token_id": 50257,
25
+ "forced_decoder_ids": [
26
+ [
27
+ 1,
28
+ 50269
29
+ ],
30
+ [
31
+ 2,
32
+ 50359
33
+ ],
34
+ [
35
+ 3,
36
+ 50363
37
+ ]
38
+ ],
39
+ "init_std": 0.02,
40
+ "is_encoder_decoder": true,
41
+ "mask_feature_length": 10,
42
+ "mask_feature_min_masks": 0,
43
+ "mask_feature_prob": 0.0,
44
+ "mask_time_length": 10,
45
+ "mask_time_min_masks": 2,
46
+ "mask_time_prob": 0.05,
47
+ "max_length": null,
48
+ "max_source_positions": 1500,
49
+ "max_target_positions": 448,
50
+ "median_filter_width": 7,
51
+ "model_type": "whisper",
52
+ "num_hidden_layers": 24,
53
+ "num_mel_bins": 80,
54
+ "pad_token_id": 50257,
55
+ "scale_embedding": false,
56
+ "torch_dtype": "float32",
57
+ "transformers_version": "4.45.2",
58
+ "use_cache": true,
59
+ "use_weighted_layer_sum": false,
60
+ "vocab_size": 51865
61
+ }
generation_config.json ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alignment_heads": [
3
+ [
4
+ 13,
5
+ 15
6
+ ],
7
+ [
8
+ 15,
9
+ 4
10
+ ],
11
+ [
12
+ 15,
13
+ 15
14
+ ],
15
+ [
16
+ 16,
17
+ 1
18
+ ],
19
+ [
20
+ 20,
21
+ 0
22
+ ],
23
+ [
24
+ 23,
25
+ 4
26
+ ]
27
+ ],
28
+ "begin_suppress_tokens": [
29
+ 220,
30
+ 50257
31
+ ],
32
+ "bos_token_id": 50257,
33
+ "decoder_start_token_id": 50258,
34
+ "eos_token_id": 50257,
35
+ "forced_decoder_ids": [
36
+ [
37
+ 1,
38
+ null
39
+ ],
40
+ [
41
+ 2,
42
+ 50359
43
+ ]
44
+ ],
45
+ "is_multilingual": true,
46
+ "lang_to_id": {
47
+ "<|af|>": 50327,
48
+ "<|am|>": 50334,
49
+ "<|ar|>": 50272,
50
+ "<|as|>": 50350,
51
+ "<|az|>": 50304,
52
+ "<|ba|>": 50355,
53
+ "<|be|>": 50330,
54
+ "<|bg|>": 50292,
55
+ "<|bn|>": 50302,
56
+ "<|bo|>": 50347,
57
+ "<|br|>": 50309,
58
+ "<|bs|>": 50315,
59
+ "<|ca|>": 50270,
60
+ "<|cs|>": 50283,
61
+ "<|cy|>": 50297,
62
+ "<|da|>": 50285,
63
+ "<|de|>": 50261,
64
+ "<|el|>": 50281,
65
+ "<|en|>": 50259,
66
+ "<|es|>": 50262,
67
+ "<|et|>": 50307,
68
+ "<|eu|>": 50310,
69
+ "<|fa|>": 50300,
70
+ "<|fi|>": 50277,
71
+ "<|fo|>": 50338,
72
+ "<|fr|>": 50265,
73
+ "<|gl|>": 50319,
74
+ "<|gu|>": 50333,
75
+ "<|haw|>": 50352,
76
+ "<|ha|>": 50354,
77
+ "<|he|>": 50279,
78
+ "<|hi|>": 50276,
79
+ "<|hr|>": 50291,
80
+ "<|ht|>": 50339,
81
+ "<|hu|>": 50286,
82
+ "<|hy|>": 50312,
83
+ "<|id|>": 50275,
84
+ "<|is|>": 50311,
85
+ "<|it|>": 50274,
86
+ "<|ja|>": 50266,
87
+ "<|jw|>": 50356,
88
+ "<|ka|>": 50329,
89
+ "<|kk|>": 50316,
90
+ "<|km|>": 50323,
91
+ "<|kn|>": 50306,
92
+ "<|ko|>": 50264,
93
+ "<|la|>": 50294,
94
+ "<|lb|>": 50345,
95
+ "<|ln|>": 50353,
96
+ "<|lo|>": 50336,
97
+ "<|lt|>": 50293,
98
+ "<|lv|>": 50301,
99
+ "<|mg|>": 50349,
100
+ "<|mi|>": 50295,
101
+ "<|mk|>": 50308,
102
+ "<|ml|>": 50296,
103
+ "<|mn|>": 50314,
104
+ "<|mr|>": 50320,
105
+ "<|ms|>": 50282,
106
+ "<|mt|>": 50343,
107
+ "<|my|>": 50346,
108
+ "<|ne|>": 50313,
109
+ "<|nl|>": 50271,
110
+ "<|nn|>": 50342,
111
+ "<|no|>": 50288,
112
+ "<|oc|>": 50328,
113
+ "<|pa|>": 50321,
114
+ "<|pl|>": 50269,
115
+ "<|ps|>": 50340,
116
+ "<|pt|>": 50267,
117
+ "<|ro|>": 50284,
118
+ "<|ru|>": 50263,
119
+ "<|sa|>": 50344,
120
+ "<|sd|>": 50332,
121
+ "<|si|>": 50322,
122
+ "<|sk|>": 50298,
123
+ "<|sl|>": 50305,
124
+ "<|sn|>": 50324,
125
+ "<|so|>": 50326,
126
+ "<|sq|>": 50317,
127
+ "<|sr|>": 50303,
128
+ "<|su|>": 50357,
129
+ "<|sv|>": 50273,
130
+ "<|sw|>": 50318,
131
+ "<|ta|>": 50287,
132
+ "<|te|>": 50299,
133
+ "<|tg|>": 50331,
134
+ "<|th|>": 50289,
135
+ "<|tk|>": 50341,
136
+ "<|tl|>": 50348,
137
+ "<|tr|>": 50268,
138
+ "<|tt|>": 50351,
139
+ "<|uk|>": 50280,
140
+ "<|ur|>": 50290,
141
+ "<|uz|>": 50337,
142
+ "<|vi|>": 50278,
143
+ "<|yi|>": 50335,
144
+ "<|yo|>": 50325,
145
+ "<|zh|>": 50260
146
+ },
147
+ "max_initial_timestamp_index": 50,
148
+ "max_length": 448,
149
+ "no_timestamps_token_id": 50363,
150
+ "pad_token_id": 50257,
151
+ "prev_sot_token_id": 50361,
152
+ "return_timestamps": false,
153
+ "suppress_tokens": [
154
+ 1,
155
+ 2,
156
+ 7,
157
+ 8,
158
+ 9,
159
+ 10,
160
+ 14,
161
+ 25,
162
+ 26,
163
+ 27,
164
+ 28,
165
+ 29,
166
+ 31,
167
+ 58,
168
+ 59,
169
+ 60,
170
+ 61,
171
+ 62,
172
+ 63,
173
+ 90,
174
+ 91,
175
+ 92,
176
+ 93,
177
+ 359,
178
+ 503,
179
+ 522,
180
+ 542,
181
+ 873,
182
+ 893,
183
+ 902,
184
+ 918,
185
+ 922,
186
+ 931,
187
+ 1350,
188
+ 1853,
189
+ 1982,
190
+ 2460,
191
+ 2627,
192
+ 3246,
193
+ 3253,
194
+ 3268,
195
+ 3536,
196
+ 3846,
197
+ 3961,
198
+ 4183,
199
+ 4667,
200
+ 6585,
201
+ 6647,
202
+ 7273,
203
+ 9061,
204
+ 9383,
205
+ 10428,
206
+ 10929,
207
+ 11938,
208
+ 12033,
209
+ 12331,
210
+ 12562,
211
+ 13793,
212
+ 14157,
213
+ 14635,
214
+ 15265,
215
+ 15618,
216
+ 16553,
217
+ 16604,
218
+ 18362,
219
+ 18956,
220
+ 20075,
221
+ 21675,
222
+ 22520,
223
+ 26130,
224
+ 26161,
225
+ 26435,
226
+ 28279,
227
+ 29464,
228
+ 31650,
229
+ 32302,
230
+ 32470,
231
+ 36865,
232
+ 42863,
233
+ 47425,
234
+ 49870,
235
+ 50254,
236
+ 50258,
237
+ 50358,
238
+ 50359,
239
+ 50360,
240
+ 50361,
241
+ 50362
242
+ ],
243
+ "task_to_id": {
244
+ "transcribe": 50359,
245
+ "translate": 50358
246
+ },
247
+ "transformers_version": "4.45.2"
248
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dddbec4e23fd304a856cabd5fd404a83334104d5e82f86520e78dfce51396468
3
+ size 3055544304
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a417e901fc111123acf5d0bb0897cd0a465cda5f35d9e5af998d7fb61bf698dc
3
+ size 6099368783
preprocessor_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunk_length": 30,
3
+ "feature_extractor_type": "WhisperFeatureExtractor",
4
+ "feature_size": 80,
5
+ "hop_length": 160,
6
+ "n_fft": 400,
7
+ "n_samples": 480000,
8
+ "nb_max_frames": 3000,
9
+ "padding_side": "right",
10
+ "padding_value": 0.0,
11
+ "processor_class": "WhisperProcessor",
12
+ "return_attention_mask": false,
13
+ "sampling_rate": 16000
14
+ }
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb9797f2a717da7201b345659d92175ed5a003888546563c3c9c54c29ae40372
3
+ size 14503
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ade1c95f4829471f6f08ab8a27d7b178b8054745ff76da84224ce6a8b782a962
3
+ size 623
trainer_state.json ADDED
@@ -0,0 +1,505 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.15666962437148774,
3
+ "best_model_checkpoint": "results15/checkpoint-3000",
4
+ "epoch": 0.256,
5
+ "eval_steps": 500,
6
+ "global_step": 4000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0064,
13
+ "grad_norm": 4.8001484870910645,
14
+ "learning_rate": 1.9200000000000003e-06,
15
+ "loss": 1.3393,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.0128,
20
+ "grad_norm": 3.706824779510498,
21
+ "learning_rate": 3.920000000000001e-06,
22
+ "loss": 0.5176,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 0.0192,
27
+ "grad_norm": 3.8430540561676025,
28
+ "learning_rate": 5.92e-06,
29
+ "loss": 0.2587,
30
+ "step": 300
31
+ },
32
+ {
33
+ "epoch": 0.0256,
34
+ "grad_norm": 3.9047493934631348,
35
+ "learning_rate": 7.92e-06,
36
+ "loss": 0.2493,
37
+ "step": 400
38
+ },
39
+ {
40
+ "epoch": 0.032,
41
+ "grad_norm": 3.7892723083496094,
42
+ "learning_rate": 9.920000000000002e-06,
43
+ "loss": 0.2446,
44
+ "step": 500
45
+ },
46
+ {
47
+ "epoch": 0.032,
48
+ "eval_test1_cer": 0.1596436118028828,
49
+ "eval_test1_cer_norm": 0.14118874332848133,
50
+ "eval_test1_loss": 0.22201526165008545,
51
+ "eval_test1_runtime": 1157.1217,
52
+ "eval_test1_samples_per_second": 2.161,
53
+ "eval_test1_steps_per_second": 0.068,
54
+ "eval_test1_wer": 0.3501922508133688,
55
+ "eval_test1_wer_norm": 0.27503187357310166,
56
+ "step": 500
57
+ },
58
+ {
59
+ "epoch": 0.032,
60
+ "eval_test2_cer": 0.34475821649329114,
61
+ "eval_test2_cer_norm": 0.32302360141569386,
62
+ "eval_test2_loss": 0.3860654830932617,
63
+ "eval_test2_runtime": 1327.3073,
64
+ "eval_test2_samples_per_second": 1.884,
65
+ "eval_test2_steps_per_second": 0.06,
66
+ "eval_test2_wer": 0.5888413010765134,
67
+ "eval_test2_wer_norm": 0.5090455757856894,
68
+ "step": 500
69
+ },
70
+ {
71
+ "epoch": 0.0384,
72
+ "grad_norm": 4.102191925048828,
73
+ "learning_rate": 9.936528925619836e-06,
74
+ "loss": 0.2387,
75
+ "step": 600
76
+ },
77
+ {
78
+ "epoch": 0.0448,
79
+ "grad_norm": 3.395754337310791,
80
+ "learning_rate": 9.870413223140496e-06,
81
+ "loss": 0.2333,
82
+ "step": 700
83
+ },
84
+ {
85
+ "epoch": 0.0512,
86
+ "grad_norm": 3.3210232257843018,
87
+ "learning_rate": 9.804297520661158e-06,
88
+ "loss": 0.2296,
89
+ "step": 800
90
+ },
91
+ {
92
+ "epoch": 0.0576,
93
+ "grad_norm": 3.533163070678711,
94
+ "learning_rate": 9.738181818181818e-06,
95
+ "loss": 0.2235,
96
+ "step": 900
97
+ },
98
+ {
99
+ "epoch": 0.064,
100
+ "grad_norm": 3.154200315475464,
101
+ "learning_rate": 9.67206611570248e-06,
102
+ "loss": 0.2268,
103
+ "step": 1000
104
+ },
105
+ {
106
+ "epoch": 0.064,
107
+ "eval_test1_cer": 0.06988151837597162,
108
+ "eval_test1_cer_norm": 0.05281416787967006,
109
+ "eval_test1_loss": 0.2057729810476303,
110
+ "eval_test1_runtime": 1040.3745,
111
+ "eval_test1_samples_per_second": 2.403,
112
+ "eval_test1_steps_per_second": 0.076,
113
+ "eval_test1_wer": 0.20041407867494823,
114
+ "eval_test1_wer_norm": 0.1341062057105583,
115
+ "step": 1000
116
+ },
117
+ {
118
+ "epoch": 0.064,
119
+ "eval_test2_cer": 0.17753561736770693,
120
+ "eval_test2_cer_norm": 0.1456756809604818,
121
+ "eval_test2_loss": 0.35842248797416687,
122
+ "eval_test2_runtime": 1152.2398,
123
+ "eval_test2_samples_per_second": 2.17,
124
+ "eval_test2_steps_per_second": 0.069,
125
+ "eval_test2_wer": 0.36184743604583863,
126
+ "eval_test2_wer_norm": 0.28771889133712164,
127
+ "step": 1000
128
+ },
129
+ {
130
+ "epoch": 0.0704,
131
+ "grad_norm": 4.132891654968262,
132
+ "learning_rate": 9.605950413223142e-06,
133
+ "loss": 0.2234,
134
+ "step": 1100
135
+ },
136
+ {
137
+ "epoch": 0.0768,
138
+ "grad_norm": 3.4513018131256104,
139
+ "learning_rate": 9.539834710743802e-06,
140
+ "loss": 0.2218,
141
+ "step": 1200
142
+ },
143
+ {
144
+ "epoch": 0.0832,
145
+ "grad_norm": 3.2677953243255615,
146
+ "learning_rate": 9.473719008264464e-06,
147
+ "loss": 0.2178,
148
+ "step": 1300
149
+ },
150
+ {
151
+ "epoch": 0.0896,
152
+ "grad_norm": 2.8952624797821045,
153
+ "learning_rate": 9.407603305785124e-06,
154
+ "loss": 0.2161,
155
+ "step": 1400
156
+ },
157
+ {
158
+ "epoch": 0.096,
159
+ "grad_norm": 3.3220458030700684,
160
+ "learning_rate": 9.341487603305786e-06,
161
+ "loss": 0.209,
162
+ "step": 1500
163
+ },
164
+ {
165
+ "epoch": 0.096,
166
+ "eval_test1_cer": 0.061481208965361106,
167
+ "eval_test1_cer_norm": 0.04084910237748666,
168
+ "eval_test1_loss": 0.19823798537254333,
169
+ "eval_test1_runtime": 1030.8083,
170
+ "eval_test1_samples_per_second": 2.425,
171
+ "eval_test1_steps_per_second": 0.077,
172
+ "eval_test1_wer": 0.1774622892635315,
173
+ "eval_test1_wer_norm": 0.11234322649509296,
174
+ "step": 1500
175
+ },
176
+ {
177
+ "epoch": 0.096,
178
+ "eval_test2_cer": 0.11561981757877281,
179
+ "eval_test2_cer_norm": 0.09326176648872725,
180
+ "eval_test2_loss": 0.34961584210395813,
181
+ "eval_test2_runtime": 1044.2589,
182
+ "eval_test2_samples_per_second": 2.394,
183
+ "eval_test2_steps_per_second": 0.076,
184
+ "eval_test2_wer": 0.25847899062391483,
185
+ "eval_test2_wer_norm": 0.18728980633190306,
186
+ "step": 1500
187
+ },
188
+ {
189
+ "epoch": 0.1024,
190
+ "grad_norm": 3.133871555328369,
191
+ "learning_rate": 9.275371900826448e-06,
192
+ "loss": 0.2173,
193
+ "step": 1600
194
+ },
195
+ {
196
+ "epoch": 0.1088,
197
+ "grad_norm": 3.023627758026123,
198
+ "learning_rate": 9.209917355371902e-06,
199
+ "loss": 0.2098,
200
+ "step": 1700
201
+ },
202
+ {
203
+ "epoch": 0.1152,
204
+ "grad_norm": 3.384046792984009,
205
+ "learning_rate": 9.143801652892564e-06,
206
+ "loss": 0.2085,
207
+ "step": 1800
208
+ },
209
+ {
210
+ "epoch": 0.1216,
211
+ "grad_norm": 4.177057266235352,
212
+ "learning_rate": 9.077685950413224e-06,
213
+ "loss": 0.2057,
214
+ "step": 1900
215
+ },
216
+ {
217
+ "epoch": 0.128,
218
+ "grad_norm": 3.657303810119629,
219
+ "learning_rate": 9.011570247933886e-06,
220
+ "loss": 0.2073,
221
+ "step": 2000
222
+ },
223
+ {
224
+ "epoch": 0.128,
225
+ "eval_test1_cer": 0.06908912534903026,
226
+ "eval_test1_cer_norm": 0.04828723920426977,
227
+ "eval_test1_loss": 0.18983058631420135,
228
+ "eval_test1_runtime": 1097.0314,
229
+ "eval_test1_samples_per_second": 2.279,
230
+ "eval_test1_steps_per_second": 0.072,
231
+ "eval_test1_wer": 0.1898846495119787,
232
+ "eval_test1_wer_norm": 0.128057639280102,
233
+ "step": 2000
234
+ },
235
+ {
236
+ "epoch": 0.128,
237
+ "eval_test2_cer": 0.1378665385195236,
238
+ "eval_test2_cer_norm": 0.1097016092763145,
239
+ "eval_test2_loss": 0.33988869190216064,
240
+ "eval_test2_runtime": 1142.2555,
241
+ "eval_test2_samples_per_second": 2.189,
242
+ "eval_test2_steps_per_second": 0.069,
243
+ "eval_test2_wer": 0.2811957402477139,
244
+ "eval_test2_wer_norm": 0.21402064246781863,
245
+ "step": 2000
246
+ },
247
+ {
248
+ "epoch": 0.1344,
249
+ "grad_norm": 2.9096293449401855,
250
+ "learning_rate": 8.945454545454546e-06,
251
+ "loss": 0.208,
252
+ "step": 2100
253
+ },
254
+ {
255
+ "epoch": 0.1408,
256
+ "grad_norm": 3.0557897090911865,
257
+ "learning_rate": 8.879338842975208e-06,
258
+ "loss": 0.2146,
259
+ "step": 2200
260
+ },
261
+ {
262
+ "epoch": 0.1472,
263
+ "grad_norm": 3.3298580646514893,
264
+ "learning_rate": 8.81322314049587e-06,
265
+ "loss": 0.2035,
266
+ "step": 2300
267
+ },
268
+ {
269
+ "epoch": 0.1536,
270
+ "grad_norm": 2.332103729248047,
271
+ "learning_rate": 8.74710743801653e-06,
272
+ "loss": 0.196,
273
+ "step": 2400
274
+ },
275
+ {
276
+ "epoch": 0.16,
277
+ "grad_norm": 2.920443534851074,
278
+ "learning_rate": 8.680991735537191e-06,
279
+ "loss": 0.1982,
280
+ "step": 2500
281
+ },
282
+ {
283
+ "epoch": 0.16,
284
+ "eval_test1_cer": 0.06446211606671194,
285
+ "eval_test1_cer_norm": 0.04428918000970403,
286
+ "eval_test1_loss": 0.18459410965442657,
287
+ "eval_test1_runtime": 1086.8024,
288
+ "eval_test1_samples_per_second": 2.3,
289
+ "eval_test1_steps_per_second": 0.073,
290
+ "eval_test1_wer": 0.17775805974563738,
291
+ "eval_test1_wer_norm": 0.11643490378628399,
292
+ "step": 2500
293
+ },
294
+ {
295
+ "epoch": 0.16,
296
+ "eval_test2_cer": 0.11876224935926429,
297
+ "eval_test2_cer_norm": 0.09257249564928335,
298
+ "eval_test2_loss": 0.33487755060195923,
299
+ "eval_test2_runtime": 1110.7242,
300
+ "eval_test2_samples_per_second": 2.251,
301
+ "eval_test2_steps_per_second": 0.071,
302
+ "eval_test2_wer": 0.2543986572519968,
303
+ "eval_test2_wer_norm": 0.1844195755537516,
304
+ "step": 2500
305
+ },
306
+ {
307
+ "epoch": 0.1664,
308
+ "grad_norm": 2.761300802230835,
309
+ "learning_rate": 8.614876033057852e-06,
310
+ "loss": 0.1969,
311
+ "step": 2600
312
+ },
313
+ {
314
+ "epoch": 0.1728,
315
+ "grad_norm": 2.2639405727386475,
316
+ "learning_rate": 8.548760330578513e-06,
317
+ "loss": 0.2011,
318
+ "step": 2700
319
+ },
320
+ {
321
+ "epoch": 0.1792,
322
+ "grad_norm": 2.7103681564331055,
323
+ "learning_rate": 8.482644628099175e-06,
324
+ "loss": 0.1911,
325
+ "step": 2800
326
+ },
327
+ {
328
+ "epoch": 0.1856,
329
+ "grad_norm": 3.4355201721191406,
330
+ "learning_rate": 8.416528925619835e-06,
331
+ "loss": 0.1915,
332
+ "step": 2900
333
+ },
334
+ {
335
+ "epoch": 0.192,
336
+ "grad_norm": 2.784461259841919,
337
+ "learning_rate": 8.350413223140497e-06,
338
+ "loss": 0.1962,
339
+ "step": 3000
340
+ },
341
+ {
342
+ "epoch": 0.192,
343
+ "eval_test1_cer": 0.052505471285186026,
344
+ "eval_test1_cer_norm": 0.03515769044153324,
345
+ "eval_test1_loss": 0.17998091876506805,
346
+ "eval_test1_runtime": 1020.463,
347
+ "eval_test1_samples_per_second": 2.45,
348
+ "eval_test1_steps_per_second": 0.077,
349
+ "eval_test1_wer": 0.15666962437148774,
350
+ "eval_test1_wer_norm": 0.09473122424170546,
351
+ "step": 3000
352
+ },
353
+ {
354
+ "epoch": 0.192,
355
+ "eval_test2_cer": 0.11729232624755012,
356
+ "eval_test2_cer_norm": 0.09097886236092372,
357
+ "eval_test2_loss": 0.3286541700363159,
358
+ "eval_test2_runtime": 1052.1877,
359
+ "eval_test2_samples_per_second": 2.376,
360
+ "eval_test2_steps_per_second": 0.075,
361
+ "eval_test2_wer": 0.2477427943048964,
362
+ "eval_test2_wer_norm": 0.17702655688275543,
363
+ "step": 3000
364
+ },
365
+ {
366
+ "epoch": 0.1984,
367
+ "grad_norm": 2.9920506477355957,
368
+ "learning_rate": 8.284297520661157e-06,
369
+ "loss": 0.1916,
370
+ "step": 3100
371
+ },
372
+ {
373
+ "epoch": 0.2048,
374
+ "grad_norm": 2.529097318649292,
375
+ "learning_rate": 8.21818181818182e-06,
376
+ "loss": 0.1909,
377
+ "step": 3200
378
+ },
379
+ {
380
+ "epoch": 0.2112,
381
+ "grad_norm": 2.646070718765259,
382
+ "learning_rate": 8.152066115702481e-06,
383
+ "loss": 0.1904,
384
+ "step": 3300
385
+ },
386
+ {
387
+ "epoch": 0.2176,
388
+ "grad_norm": 3.047825336456299,
389
+ "learning_rate": 8.085950413223141e-06,
390
+ "loss": 0.1935,
391
+ "step": 3400
392
+ },
393
+ {
394
+ "epoch": 0.224,
395
+ "grad_norm": 2.928596258163452,
396
+ "learning_rate": 8.019834710743803e-06,
397
+ "loss": 0.1943,
398
+ "step": 3500
399
+ },
400
+ {
401
+ "epoch": 0.224,
402
+ "eval_test1_cer": 0.06426873443513698,
403
+ "eval_test1_cer_norm": 0.041746724890829694,
404
+ "eval_test1_loss": 0.17650315165519714,
405
+ "eval_test1_runtime": 1003.7308,
406
+ "eval_test1_samples_per_second": 2.491,
407
+ "eval_test1_steps_per_second": 0.079,
408
+ "eval_test1_wer": 0.17329192546583852,
409
+ "eval_test1_wer_norm": 0.10694695644439173,
410
+ "step": 3500
411
+ },
412
+ {
413
+ "epoch": 0.224,
414
+ "eval_test2_cer": 0.13343792401628224,
415
+ "eval_test2_cer_norm": 0.09719207680725836,
416
+ "eval_test2_loss": 0.32644087076187134,
417
+ "eval_test2_runtime": 1081.9218,
418
+ "eval_test2_samples_per_second": 2.311,
419
+ "eval_test2_steps_per_second": 0.073,
420
+ "eval_test2_wer": 0.26533742331288346,
421
+ "eval_test2_wer_norm": 0.1966542966484982,
422
+ "step": 3500
423
+ },
424
+ {
425
+ "epoch": 0.2304,
426
+ "grad_norm": 2.7711305618286133,
427
+ "learning_rate": 7.953719008264463e-06,
428
+ "loss": 0.196,
429
+ "step": 3600
430
+ },
431
+ {
432
+ "epoch": 0.2368,
433
+ "grad_norm": 2.9088964462280273,
434
+ "learning_rate": 7.888264462809919e-06,
435
+ "loss": 0.1899,
436
+ "step": 3700
437
+ },
438
+ {
439
+ "epoch": 0.2432,
440
+ "grad_norm": 2.884896993637085,
441
+ "learning_rate": 7.822148760330579e-06,
442
+ "loss": 0.1938,
443
+ "step": 3800
444
+ },
445
+ {
446
+ "epoch": 0.2496,
447
+ "grad_norm": 2.511730432510376,
448
+ "learning_rate": 7.75603305785124e-06,
449
+ "loss": 0.1872,
450
+ "step": 3900
451
+ },
452
+ {
453
+ "epoch": 0.256,
454
+ "grad_norm": 2.930680990219116,
455
+ "learning_rate": 7.689917355371903e-06,
456
+ "loss": 0.1894,
457
+ "step": 4000
458
+ },
459
+ {
460
+ "epoch": 0.256,
461
+ "eval_test1_cer": 0.071758735189797,
462
+ "eval_test1_cer_norm": 0.047394468704512374,
463
+ "eval_test1_loss": 0.1755101978778839,
464
+ "eval_test1_runtime": 1295.9749,
465
+ "eval_test1_samples_per_second": 1.929,
466
+ "eval_test1_steps_per_second": 0.061,
467
+ "eval_test1_wer": 0.18352558414670217,
468
+ "eval_test1_wer_norm": 0.12132712663444718,
469
+ "step": 4000
470
+ },
471
+ {
472
+ "epoch": 0.256,
473
+ "eval_test2_cer": 0.13147802653399668,
474
+ "eval_test2_cer_norm": 0.10113216401712911,
475
+ "eval_test2_loss": 0.32261422276496887,
476
+ "eval_test2_runtime": 1380.4258,
477
+ "eval_test2_samples_per_second": 1.811,
478
+ "eval_test2_steps_per_second": 0.057,
479
+ "eval_test2_wer": 0.2693888181502489,
480
+ "eval_test2_wer_norm": 0.19903165951524993,
481
+ "step": 4000
482
+ }
483
+ ],
484
+ "logging_steps": 100,
485
+ "max_steps": 15625,
486
+ "num_input_tokens_seen": 0,
487
+ "num_train_epochs": 9223372036854775807,
488
+ "save_steps": 500,
489
+ "stateful_callbacks": {
490
+ "TrainerControl": {
491
+ "args": {
492
+ "should_epoch_stop": false,
493
+ "should_evaluate": false,
494
+ "should_log": false,
495
+ "should_save": true,
496
+ "should_training_stop": false
497
+ },
498
+ "attributes": {}
499
+ }
500
+ },
501
+ "total_flos": 1.3063742816256e+20,
502
+ "train_batch_size": 32,
503
+ "trial_name": null,
504
+ "trial_params": null
505
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11c5247ce7036864a2b251a751a9841aee3e1371b95e5607defbd0d3804d9892
3
+ size 4911