ashanhr commited on
Commit
db7853e
·
verified ·
1 Parent(s): bf102be

Training in progress, step 4800, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a389c3f77e972b9ad2c3598a897a595d740c11eef3134fc3913b671d5c4aa98
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:055979e5e4bab7fa9c297e62d4dd024246a4b92488281ca6ea9d09b258b2dbed
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cf523156e6cfba18fe652208aa23b347ae9fa03eb5a27cf6ee2de9b87e0b018
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5fd6e75335a40460c09860fda74c9a6c5d024c71c570c9f8edaef3d82e2c4b6
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:972ad41e162646ea15a0a97c616a6a668482e5a2db3e8f5673ee3c315481e243
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:debc4e9a9196125845f12a1727f21f7a7bafd177d8aed8ac1735fdd1c7612955
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0965352267660a565504fd8fd4be8deea39448c1829b29947da55924c359a90b
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74ca4a69432f7601c2ee6fa63216c3206d700ac81d698c937691e7b4a79a3cac
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd64d58f0ca1f8e908f6d74c38fec76a5c8bd2eba7e4b0c0c52260244cc0f23b
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f3ec477801d437578c540c89110c8a9e9fd36ab687066f3be220e65fc1f6809
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.9364344348558198,
5
  "eval_steps": 100,
6
- "global_step": 4600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -743,6 +743,38 @@
743
  "eval_samples_per_second": 27.885,
744
  "eval_steps_per_second": 3.486,
745
  "step": 4600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
746
  }
747
  ],
748
  "logging_steps": 100,
@@ -750,7 +782,7 @@
750
  "num_input_tokens_seen": 0,
751
  "num_train_epochs": 30,
752
  "save_steps": 100,
753
- "total_flos": 5.048556716553413e+19,
754
  "train_batch_size": 8,
755
  "trial_name": null,
756
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.0206272363712903,
5
  "eval_steps": 100,
6
+ "global_step": 4800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
743
  "eval_samples_per_second": 27.885,
744
  "eval_steps_per_second": 3.486,
745
  "step": 4600
746
+ },
747
+ {
748
+ "epoch": 1.98,
749
+ "grad_norm": 3.5580544471740723,
750
+ "learning_rate": 4.703745583038869e-05,
751
+ "loss": 3.577,
752
+ "step": 4700
753
+ },
754
+ {
755
+ "epoch": 1.98,
756
+ "eval_cer": 0.5378284971256502,
757
+ "eval_loss": 2.777487277984619,
758
+ "eval_runtime": 356.1415,
759
+ "eval_samples_per_second": 26.613,
760
+ "eval_steps_per_second": 3.327,
761
+ "step": 4700
762
+ },
763
+ {
764
+ "epoch": 2.02,
765
+ "grad_norm": 2.3930351734161377,
766
+ "learning_rate": 4.6966784452296826e-05,
767
+ "loss": 2.733,
768
+ "step": 4800
769
+ },
770
+ {
771
+ "epoch": 2.02,
772
+ "eval_cer": 0.5305424113253294,
773
+ "eval_loss": 3.2367630004882812,
774
+ "eval_runtime": 346.0446,
775
+ "eval_samples_per_second": 27.39,
776
+ "eval_steps_per_second": 3.424,
777
+ "step": 4800
778
  }
779
  ],
780
  "logging_steps": 100,
 
782
  "num_input_tokens_seen": 0,
783
  "num_train_epochs": 30,
784
  "save_steps": 100,
785
+ "total_flos": 5.262746619312e+19,
786
  "train_batch_size": 8,
787
  "trial_name": null,
788
  "trial_params": null