ashanhr commited on
Commit
da9d0e7
·
verified ·
1 Parent(s): 3035146

Training in progress, step 48900, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:919479edc9c44b709a6fbc48787e8e31ae9b413ae5d1bf037e00f15a35aa7618
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd4f68444eb86bbfd60685a66d682e2a4bcb950d07e58124c2888ef026741c77
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10995179ef6d151496a819060d002d0d14d0ae4946c8f3186f134f7b134c7b4c
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e4ed37b5aa7caa4b3db7c781c0c0a436041060f568b10a355bc722544491e3e
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38b0edab5285069b45c657b2c1e8a54173d39533689345a0336372816e6ee99f
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35803e2074649e47f1ae99ba302701281690a9b94739796e166947b76a2870de
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8905c27bc949ca26e4d080125b36f4b021cbb40b50e21f90195179c8522e0ada
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3a52d490e29ec96c5632acb35c769c2e04ef5ac5a150bd5b4ec42317318d278
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5c0c262f0099d55672ec4b48865245d6b1a61a5275913739ab46d7d5cc0df15
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c04deeab2ce545af1dd2aa0d8b2c7079874a851a6d59f58491424b131cccbdc2
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 20.50094716901705,
5
  "eval_steps": 100,
6
- "global_step": 48700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7799,6 +7799,38 @@
7799
  "eval_samples_per_second": 24.386,
7800
  "eval_steps_per_second": 3.049,
7801
  "step": 48700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7802
  }
7803
  ],
7804
  "logging_steps": 100,
@@ -7806,7 +7838,7 @@
7806
  "num_input_tokens_seen": 0,
7807
  "num_train_epochs": 30,
7808
  "save_steps": 100,
7809
- "total_flos": 5.336332584830473e+20,
7810
  "train_batch_size": 8,
7811
  "trial_name": null,
7812
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 20.58513997053252,
5
  "eval_steps": 100,
6
+ "global_step": 48900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7799
  "eval_samples_per_second": 24.386,
7800
  "eval_steps_per_second": 3.049,
7801
  "step": 48700
7802
+ },
7803
+ {
7804
+ "epoch": 20.54,
7805
+ "grad_norm": 5.763105392456055,
7806
+ "learning_rate": 1.588197879858657e-05,
7807
+ "loss": 0.4039,
7808
+ "step": 48800
7809
+ },
7810
+ {
7811
+ "epoch": 20.54,
7812
+ "eval_cer": 0.34577988737241405,
7813
+ "eval_loss": 2.2274067401885986,
7814
+ "eval_runtime": 406.2895,
7815
+ "eval_samples_per_second": 23.328,
7816
+ "eval_steps_per_second": 2.917,
7817
+ "step": 48800
7818
+ },
7819
+ {
7820
+ "epoch": 20.59,
7821
+ "grad_norm": 4.299105167388916,
7822
+ "learning_rate": 1.5811307420494702e-05,
7823
+ "loss": 0.4253,
7824
+ "step": 48900
7825
+ },
7826
+ {
7827
+ "epoch": 20.59,
7828
+ "eval_cer": 0.3451859528371984,
7829
+ "eval_loss": 3.0053601264953613,
7830
+ "eval_runtime": 386.4818,
7831
+ "eval_samples_per_second": 24.524,
7832
+ "eval_steps_per_second": 3.066,
7833
+ "step": 48900
7834
  }
7835
  ],
7836
  "logging_steps": 100,
 
7838
  "num_input_tokens_seen": 0,
7839
  "num_train_epochs": 30,
7840
  "save_steps": 100,
7841
+ "total_flos": 5.357917546821734e+20,
7842
  "train_batch_size": 8,
7843
  "trial_name": null,
7844
  "trial_params": null