ashanhr commited on
Commit
89ea76f
1 Parent(s): 83c6eb9

Training in progress, step 36700, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:212fd85a8f0771f83e70fda3fe36ae1286fc6ca154640d937c6b46050c337f8f
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2e7d744e605fdeb47c357c118df95f001014156b76f00db93fae8270b36d782
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:705d5ee447293f9a70d2f66760196b2c46343eca0cdcd5524ec4f3b449b70b84
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a0e0ccb6fbb91ed7c1319992741aa3473b384fd1962f0a2bdef33fd8f0424a5
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67b3b70c810545bf21d9ddf057aebcb724eb6e9a8a7e6c91016e8f0e815ec023
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2329d5a7fedc8f15c09d69541a87944d288c3f9c2684c5d81d7725abd374732
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a5b2fbf1a12d8205cee3894c54223b35a4a80e438d87a963917ccf699aca0e2
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:792ef05d47d8fddd57baa2e5e4c79c5f1008526305f0fc51b7e4b0ff90c07793
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:764ca9ef7fae34c6a8cba4f8df83adaaaca50ef1ca45f561d696e2b2507c6419
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28baba878695b471c12cd9b5814de5f1bb170226354cdd018df9f14201aec249
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 15.280993475057883,
5
  "eval_steps": 100,
6
- "global_step": 36300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5815,6 +5815,70 @@
5815
  "eval_samples_per_second": 25.394,
5816
  "eval_steps_per_second": 3.175,
5817
  "step": 36300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5818
  }
5819
  ],
5820
  "logging_steps": 100,
@@ -5822,7 +5886,7 @@
5822
  "num_input_tokens_seen": 0,
5823
  "num_train_epochs": 30,
5824
  "save_steps": 100,
5825
- "total_flos": 3.9789875911390396e+20,
5826
  "train_batch_size": 8,
5827
  "trial_name": null,
5828
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 15.449379078088823,
5
  "eval_steps": 100,
6
+ "global_step": 36700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5815
  "eval_samples_per_second": 25.394,
5816
  "eval_steps_per_second": 3.175,
5817
  "step": 36300
5818
+ },
5819
+ {
5820
+ "epoch": 15.32,
5821
+ "grad_norm": 4.485095024108887,
5822
+ "learning_rate": 2.4641696113074207e-05,
5823
+ "loss": 0.8412,
5824
+ "step": 36400
5825
+ },
5826
+ {
5827
+ "epoch": 15.32,
5828
+ "eval_cer": 0.3884747370067655,
5829
+ "eval_loss": 2.459603786468506,
5830
+ "eval_runtime": 390.2769,
5831
+ "eval_samples_per_second": 24.285,
5832
+ "eval_steps_per_second": 3.036,
5833
+ "step": 36400
5834
+ },
5835
+ {
5836
+ "epoch": 15.37,
5837
+ "grad_norm": 7.8216471672058105,
5838
+ "learning_rate": 2.4571024734982335e-05,
5839
+ "loss": 0.8567,
5840
+ "step": 36500
5841
+ },
5842
+ {
5843
+ "epoch": 15.37,
5844
+ "eval_cer": 0.3837354816002503,
5845
+ "eval_loss": 3.254591226577759,
5846
+ "eval_runtime": 377.582,
5847
+ "eval_samples_per_second": 25.102,
5848
+ "eval_steps_per_second": 3.138,
5849
+ "step": 36500
5850
+ },
5851
+ {
5852
+ "epoch": 15.41,
5853
+ "grad_norm": 2.479659080505371,
5854
+ "learning_rate": 2.450035335689046e-05,
5855
+ "loss": 0.8539,
5856
+ "step": 36600
5857
+ },
5858
+ {
5859
+ "epoch": 15.41,
5860
+ "eval_cer": 0.3853241953775762,
5861
+ "eval_loss": 1.766408085823059,
5862
+ "eval_runtime": 396.6796,
5863
+ "eval_samples_per_second": 23.893,
5864
+ "eval_steps_per_second": 2.987,
5865
+ "step": 36600
5866
+ },
5867
+ {
5868
+ "epoch": 15.45,
5869
+ "grad_norm": 3.3775389194488525,
5870
+ "learning_rate": 2.4429681978798587e-05,
5871
+ "loss": 0.8428,
5872
+ "step": 36700
5873
+ },
5874
+ {
5875
+ "epoch": 15.45,
5876
+ "eval_cer": 0.3814917289116577,
5877
+ "eval_loss": 1.7772597074508667,
5878
+ "eval_runtime": 373.2028,
5879
+ "eval_samples_per_second": 25.396,
5880
+ "eval_steps_per_second": 3.175,
5881
+ "step": 36700
5882
  }
5883
  ],
5884
  "logging_steps": 100,
 
5886
  "num_input_tokens_seen": 0,
5887
  "num_train_epochs": 30,
5888
  "save_steps": 100,
5889
+ "total_flos": 4.022627751818035e+20,
5890
  "train_batch_size": 8,
5891
  "trial_name": null,
5892
  "trial_params": null