ashanhr commited on
Commit
86b6356
1 Parent(s): 3002d67

Training in progress, step 30100, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c65a4983271581f7832e9fd374dbc6b246da39cee2dfc60b53fa1d44b5bb1aa8
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29bf1bd1a29eb5633b481260ad7599097eaa5d7c51a43d3732ee4e625e08fcea
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e48477b3833582447d116b3875b936c6fbc79f219715a7548f995e773a3beb9
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8044684f43b677272561ea3d1bcd96015c8188001b76c77879238a204297d09
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2393a4407e77990cc384c3df22dabed19ea02921b1539d0c2226cdd68e80dbfc
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e73302063cce900d8c19d9ee33a5ebc45e0e1fff2192c0fc7fb1a987eeacedd
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a75caa93515fbe3b2dde54a5881684cb3634e004fc9e181a959f7cb2a3a1cf4
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdb57aa75f1c95997314f28233ee0e0924d6206251b056b5e55ea31aff2195d0
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:140f760da868d14f720ef947f1dcef91d11508bf77dfffd336552f0c1d799f8d
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed7796dc0dfe9ec23d4d64125b7d9eadefb157806b1e2efe4f7c35f3e825e361
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 12.58682382656283,
5
  "eval_steps": 100,
6
- "global_step": 29900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4791,6 +4791,38 @@
4791
  "eval_samples_per_second": 25.747,
4792
  "eval_steps_per_second": 3.219,
4793
  "step": 29900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4794
  }
4795
  ],
4796
  "logging_steps": 100,
@@ -4798,7 +4830,7 @@
4798
  "num_input_tokens_seen": 0,
4799
  "num_train_epochs": 30,
4800
  "save_steps": 100,
4801
- "total_flos": 3.276539803799841e+20,
4802
  "train_batch_size": 8,
4803
  "trial_name": null,
4804
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 12.671016628078299,
5
  "eval_steps": 100,
6
+ "global_step": 30100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4791
  "eval_samples_per_second": 25.747,
4792
  "eval_steps_per_second": 3.219,
4793
  "step": 29900
4794
+ },
4795
+ {
4796
+ "epoch": 12.63,
4797
+ "grad_norm": 5.468438148498535,
4798
+ "learning_rate": 2.916395759717315e-05,
4799
+ "loss": 1.3109,
4800
+ "step": 30000
4801
+ },
4802
+ {
4803
+ "epoch": 12.63,
4804
+ "eval_cer": 0.4083043291228345,
4805
+ "eval_loss": 2.8631815910339355,
4806
+ "eval_runtime": 397.7294,
4807
+ "eval_samples_per_second": 23.83,
4808
+ "eval_steps_per_second": 2.979,
4809
+ "step": 30000
4810
+ },
4811
+ {
4812
+ "epoch": 12.67,
4813
+ "grad_norm": 3.4033985137939453,
4814
+ "learning_rate": 2.9093286219081274e-05,
4815
+ "loss": 1.0724,
4816
+ "step": 30100
4817
+ },
4818
+ {
4819
+ "epoch": 12.67,
4820
+ "eval_cer": 0.4132855578585116,
4821
+ "eval_loss": 2.082000732421875,
4822
+ "eval_runtime": 370.8747,
4823
+ "eval_samples_per_second": 25.556,
4824
+ "eval_steps_per_second": 3.195,
4825
+ "step": 30100
4826
  }
4827
  ],
4828
  "logging_steps": 100,
 
4830
  "num_input_tokens_seen": 0,
4831
  "num_train_epochs": 30,
4832
  "save_steps": 100,
4833
+ "total_flos": 3.298248713256518e+20,
4834
  "train_batch_size": 8,
4835
  "trial_name": null,
4836
  "trial_params": null