linshoufan commited on
Commit
92a0573
·
verified ·
1 Parent(s): 6d19c64

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f2356b224b2b3b49bdabac4a3bc65fa8bbafccefee46eb047ebe3a9c43d31ff
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7689f46088a5cec225dadabb3934d0b9761fa87bb74c4aefc8f88894ccf100b
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b033003fcf6e5b455d23265b57c5d28176b7d58184de12ca2ee4a174ea97ca33
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:912ee0a66a6adc271f74f3d1dcf148bd14457316cea493fa2e1cf64d5a6a6309
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9203abee247094b80f8593f8b0ff02ca19083755923e560ee1aad7ddcb2a175
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5e82f96b77a35761cfa17e364b44b3b766596ff201963b0dda148aa2e1b89fb
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad91f4a11954d1328db58fecd37f210630b64fe21a413fb880e5b1a7f88816c3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e691e2357f41aae54a4940e81c3678665984cf02110e0bb9f9a240c04290d15
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 55.83411121482864,
3
- "best_model_checkpoint": "./linshoufanfork-whisper-small-nan-tw/checkpoint-500",
4
- "epoch": 0.1607200257152041,
5
  "eval_steps": 500,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -156,6 +156,155 @@
156
  "eval_samples_per_second": 2.461,
157
  "eval_steps_per_second": 0.308,
158
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  }
160
  ],
161
  "logging_steps": 25,
@@ -163,7 +312,7 @@
163
  "num_input_tokens_seen": 0,
164
  "num_train_epochs": 1,
165
  "save_steps": 500,
166
- "total_flos": 2.30868320256e+18,
167
  "train_batch_size": 16,
168
  "trial_name": null,
169
  "trial_params": null
 
1
  {
2
+ "best_metric": 41.15215362048273,
3
+ "best_model_checkpoint": "./linshoufanfork-whisper-small-nan-tw/checkpoint-1000",
4
+ "epoch": 0.3214400514304082,
5
  "eval_steps": 500,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
156
  "eval_samples_per_second": 2.461,
157
  "eval_steps_per_second": 0.308,
158
  "step": 500
159
+ },
160
+ {
161
+ "epoch": 0.17,
162
+ "grad_norm": 13.947765350341797,
163
+ "learning_rate": 8.588508801062771e-06,
164
+ "loss": 0.7784,
165
+ "step": 525
166
+ },
167
+ {
168
+ "epoch": 0.18,
169
+ "grad_norm": 11.999704360961914,
170
+ "learning_rate": 8.50547990700764e-06,
171
+ "loss": 0.7646,
172
+ "step": 550
173
+ },
174
+ {
175
+ "epoch": 0.18,
176
+ "grad_norm": 12.103652954101562,
177
+ "learning_rate": 8.422451012952508e-06,
178
+ "loss": 0.7001,
179
+ "step": 575
180
+ },
181
+ {
182
+ "epoch": 0.19,
183
+ "grad_norm": 13.490057945251465,
184
+ "learning_rate": 8.339422118897376e-06,
185
+ "loss": 0.7941,
186
+ "step": 600
187
+ },
188
+ {
189
+ "epoch": 0.2,
190
+ "grad_norm": 13.992444038391113,
191
+ "learning_rate": 8.256393224842247e-06,
192
+ "loss": 0.6561,
193
+ "step": 625
194
+ },
195
+ {
196
+ "epoch": 0.21,
197
+ "grad_norm": 14.403618812561035,
198
+ "learning_rate": 8.173364330787115e-06,
199
+ "loss": 0.6618,
200
+ "step": 650
201
+ },
202
+ {
203
+ "epoch": 0.22,
204
+ "grad_norm": 12.38306713104248,
205
+ "learning_rate": 8.090335436731984e-06,
206
+ "loss": 0.7515,
207
+ "step": 675
208
+ },
209
+ {
210
+ "epoch": 0.23,
211
+ "grad_norm": 13.88232135772705,
212
+ "learning_rate": 8.007306542676852e-06,
213
+ "loss": 0.6823,
214
+ "step": 700
215
+ },
216
+ {
217
+ "epoch": 0.23,
218
+ "grad_norm": 14.506720542907715,
219
+ "learning_rate": 7.92427764862172e-06,
220
+ "loss": 0.662,
221
+ "step": 725
222
+ },
223
+ {
224
+ "epoch": 0.24,
225
+ "grad_norm": 11.101289749145508,
226
+ "learning_rate": 7.841248754566589e-06,
227
+ "loss": 0.6974,
228
+ "step": 750
229
+ },
230
+ {
231
+ "epoch": 0.25,
232
+ "grad_norm": 10.762197494506836,
233
+ "learning_rate": 7.758219860511459e-06,
234
+ "loss": 0.6643,
235
+ "step": 775
236
+ },
237
+ {
238
+ "epoch": 0.26,
239
+ "grad_norm": 14.123621940612793,
240
+ "learning_rate": 7.675190966456327e-06,
241
+ "loss": 0.6878,
242
+ "step": 800
243
+ },
244
+ {
245
+ "epoch": 0.27,
246
+ "grad_norm": 13.668756484985352,
247
+ "learning_rate": 7.592162072401196e-06,
248
+ "loss": 0.6102,
249
+ "step": 825
250
+ },
251
+ {
252
+ "epoch": 0.27,
253
+ "grad_norm": 13.39156723022461,
254
+ "learning_rate": 7.509133178346065e-06,
255
+ "loss": 0.6133,
256
+ "step": 850
257
+ },
258
+ {
259
+ "epoch": 0.28,
260
+ "grad_norm": 11.687459945678711,
261
+ "learning_rate": 7.426104284290934e-06,
262
+ "loss": 0.6139,
263
+ "step": 875
264
+ },
265
+ {
266
+ "epoch": 0.29,
267
+ "grad_norm": 14.004112243652344,
268
+ "learning_rate": 7.343075390235803e-06,
269
+ "loss": 0.6257,
270
+ "step": 900
271
+ },
272
+ {
273
+ "epoch": 0.3,
274
+ "grad_norm": 13.317120552062988,
275
+ "learning_rate": 7.260046496180671e-06,
276
+ "loss": 0.6616,
277
+ "step": 925
278
+ },
279
+ {
280
+ "epoch": 0.31,
281
+ "grad_norm": 13.344803810119629,
282
+ "learning_rate": 7.17701760212554e-06,
283
+ "loss": 0.6086,
284
+ "step": 950
285
+ },
286
+ {
287
+ "epoch": 0.31,
288
+ "grad_norm": 12.64527416229248,
289
+ "learning_rate": 7.09398870807041e-06,
290
+ "loss": 0.5734,
291
+ "step": 975
292
+ },
293
+ {
294
+ "epoch": 0.32,
295
+ "grad_norm": 9.928169250488281,
296
+ "learning_rate": 7.0109598140152775e-06,
297
+ "loss": 0.5845,
298
+ "step": 1000
299
+ },
300
+ {
301
+ "epoch": 0.32,
302
+ "eval_cer": 41.15215362048273,
303
+ "eval_loss": 0.5947259068489075,
304
+ "eval_runtime": 1807.1895,
305
+ "eval_samples_per_second": 2.455,
306
+ "eval_steps_per_second": 0.307,
307
+ "step": 1000
308
  }
309
  ],
310
  "logging_steps": 25,
 
312
  "num_input_tokens_seen": 0,
313
  "num_train_epochs": 1,
314
  "save_steps": 500,
315
+ "total_flos": 4.61736640512e+18,
316
  "train_batch_size": 16,
317
  "trial_name": null,
318
  "trial_params": null