ashanhr commited on
Commit
7fa3980
1 Parent(s): 32005e8

Training in progress, step 48300, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6167439d581cc395bf3088fe8ac6b11c26e4181fe551f128881e89bc4e1ce539
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1224fff0fd037aa14796a69b165995604c17ed87b445e25c006cd69e9b552d67
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4938e0f36333fadd4855c1934e42518090173b0a19d22feeaf6a23dda164827e
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1145bdd2c2468c43b77a6185bc82e494de1f8a3c4cf0ba296fdef749783fa409
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acfb4a34f27a6fb3c38894c96bbfd9bc1fd023b61822cee208fc0fb86da0a1c2
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2de1effdfae800e08200d6d6606f7de34095179fc4045eadea03d4d6c7d9fd10
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dc98f239aaa8533c601f91ade3675472665d4acfce4d9364d5c7b75e44eee84
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d2803d972252df3ae1062a374992bcf7069240c26b2f467f5507d6c88b0e69c
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e4cd969c054daa76cafad7a5bcd9e1236149af3f397933e1dd3fba11510e9d1
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b97e57d1bf31d08433b6e91c583a12e2591e8c3381bc5ab3dd2b0d64c5a237e
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 20.248368764470637,
5
  "eval_steps": 100,
6
- "global_step": 48100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7703,6 +7703,38 @@
7703
  "eval_samples_per_second": 24.403,
7704
  "eval_steps_per_second": 3.051,
7705
  "step": 48100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7706
  }
7707
  ],
7708
  "logging_steps": 100,
@@ -7710,7 +7742,7 @@
7710
  "num_input_tokens_seen": 0,
7711
  "num_train_epochs": 30,
7712
  "save_steps": 100,
7713
- "total_flos": 5.270910176053505e+20,
7714
  "train_batch_size": 8,
7715
  "trial_name": null,
7716
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 20.332561565986108,
5
  "eval_steps": 100,
6
+ "global_step": 48300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7703
  "eval_samples_per_second": 24.403,
7704
  "eval_steps_per_second": 3.051,
7705
  "step": 48100
7706
+ },
7707
+ {
7708
+ "epoch": 20.29,
7709
+ "grad_norm": 2.476924180984497,
7710
+ "learning_rate": 1.630600706713781e-05,
7711
+ "loss": 0.4173,
7712
+ "step": 48200
7713
+ },
7714
+ {
7715
+ "epoch": 20.29,
7716
+ "eval_cer": 0.3475372492276407,
7717
+ "eval_loss": 2.8535077571868896,
7718
+ "eval_runtime": 407.6688,
7719
+ "eval_samples_per_second": 23.249,
7720
+ "eval_steps_per_second": 2.907,
7721
+ "step": 48200
7722
+ },
7723
+ {
7724
+ "epoch": 20.33,
7725
+ "grad_norm": 2.1427981853485107,
7726
+ "learning_rate": 1.623533568904594e-05,
7727
+ "loss": 0.4149,
7728
+ "step": 48300
7729
+ },
7730
+ {
7731
+ "epoch": 20.33,
7732
+ "eval_cer": 0.3440518556176919,
7733
+ "eval_loss": 2.9584333896636963,
7734
+ "eval_runtime": 385.1561,
7735
+ "eval_samples_per_second": 24.608,
7736
+ "eval_steps_per_second": 3.077,
7737
+ "step": 48300
7738
  }
7739
  ],
7740
  "logging_steps": 100,
 
7742
  "num_input_tokens_seen": 0,
7743
  "num_train_epochs": 30,
7744
  "save_steps": 100,
7745
+ "total_flos": 5.292837083424306e+20,
7746
  "train_batch_size": 8,
7747
  "trial_name": null,
7748
  "trial_params": null