ashanhr commited on
Commit
5e5934b
1 Parent(s): 3ad7491

Training in progress, step 2200, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4e148dbf45ee0c1b00413faa78a1fe84e8ab9819c7f9e728e4c5e30519ee934
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00172f6dd05a49b1cf2f8ff023f3ca93980f8b4e3060eeb47e74d8f65933ac36
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8787bac1231fb5b7b2b3f2e31b1a1cc032f8441964b82d0bb566039e4f39228b
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d59fdb186dd6719014f3267c673ca01c879f220542715dcb5862f4204903bc2
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:766eb030afa62f812221a4873fc80200f2130440660a4ab8893d5971174019e0
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbe23e6ec83e38f888ce1b667257f959e02478dbaef89d9e23350f77615b295b
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38091ee0235dae3b62dd1e1fdf55b8c87b4cf4944d0b07d3a101719f8706e81c
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39c98aca05a0d374052b7c1cb664ff3eb713201073b7a44e234eb536fbccee1c
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53944e7556a428dc9e5909eb4fd8f2f65cd084397466c50c8d395df612051b69
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1665cb8324f796c9f90a2705fd01fef5d8ec3fa89b088395f8b2aa4bae6228c2
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8419280151547043,
5
  "eval_steps": 100,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -327,6 +327,38 @@
327
  "eval_samples_per_second": 27.767,
328
  "eval_steps_per_second": 3.472,
329
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  }
331
  ],
332
  "logging_steps": 100,
@@ -334,7 +366,7 @@
334
  "num_input_tokens_seen": 0,
335
  "num_train_epochs": 30,
336
  "save_steps": 100,
337
- "total_flos": 2.194770023027631e+19,
338
  "train_batch_size": 8,
339
  "trial_name": null,
340
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9261208166701747,
5
  "eval_steps": 100,
6
+ "global_step": 2200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
327
  "eval_samples_per_second": 27.767,
328
  "eval_steps_per_second": 3.472,
329
  "step": 2000
330
+ },
331
+ {
332
+ "epoch": 0.88,
333
+ "grad_norm": 2.842353343963623,
334
+ "learning_rate": 4.8873498233215547e-05,
335
+ "loss": 2.1649,
336
+ "step": 2100
337
+ },
338
+ {
339
+ "epoch": 0.88,
340
+ "eval_cer": 0.544757733369833,
341
+ "eval_loss": 2.421653985977173,
342
+ "eval_runtime": 358.7261,
343
+ "eval_samples_per_second": 26.421,
344
+ "eval_steps_per_second": 3.303,
345
+ "step": 2100
346
+ },
347
+ {
348
+ "epoch": 0.93,
349
+ "grad_norm": 4.818225383758545,
350
+ "learning_rate": 4.880282685512368e-05,
351
+ "loss": 2.2775,
352
+ "step": 2200
353
+ },
354
+ {
355
+ "epoch": 0.93,
356
+ "eval_cer": 0.5533832270931915,
357
+ "eval_loss": 2.188101291656494,
358
+ "eval_runtime": 354.4215,
359
+ "eval_samples_per_second": 26.742,
360
+ "eval_steps_per_second": 3.343,
361
+ "step": 2200
362
  }
363
  ],
364
  "logging_steps": 100,
 
366
  "num_input_tokens_seen": 0,
367
  "num_train_epochs": 30,
368
  "save_steps": 100,
369
+ "total_flos": 2.411832086421037e+19,
370
  "train_batch_size": 8,
371
  "trial_name": null,
372
  "trial_params": null