ashanhr commited on
Commit
8e87429
1 Parent(s): 86c06b7

Training in progress, step 5200, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41875e86984f9c499bbd46447c4b07ffb7cbdf07789f652a89a6868d23bab35f
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:526f37a1fd8614e33d688ca0c95a0b4bdb2270b3c6287907514481f46e211e72
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf6fc30af65fb1bb2b342a3d768a007f7604ee05fb1696eb8530da85555ef0b5
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ce64a952a4cce525f3a0cb0ed6d5e65ba07369000da2ff59f3ff55e450ce479
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc85d1a0b008548cc1a644ec52e7a922d0f2f1c680c67a89f68ab5e2f58767a5
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c379ec78bc415dd3c67bb2c638a8e23762a3c06fee68b3ab33b1391af723912b
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a56c4b251eaaa521dd5897d38bb3df55dc9d33c44687cea24d832bf76a4766d
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:268b899628f3fbd3f8e86b05216b622b8e7edbffdb48136b8a16736e8c90ff48
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57ebf369faef4240020a647139b481dc6228228676411a7d695726a9d3bf18a5
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:784d3c121c9f359553146d9fe14f9140da42065b084f4fa423f15dfc626b4e3b
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.1048200378867605,
5
  "eval_steps": 100,
6
- "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -807,6 +807,38 @@
807
  "eval_samples_per_second": 26.983,
808
  "eval_steps_per_second": 3.374,
809
  "step": 5000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
810
  }
811
  ],
812
  "logging_steps": 100,
@@ -814,7 +846,7 @@
814
  "num_input_tokens_seen": 0,
815
  "num_train_epochs": 30,
816
  "save_steps": 100,
817
- "total_flos": 5.4841600382755045e+19,
818
  "train_batch_size": 8,
819
  "trial_name": null,
820
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.1890128394022312,
5
  "eval_steps": 100,
6
+ "global_step": 5200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
807
  "eval_samples_per_second": 26.983,
808
  "eval_steps_per_second": 3.374,
809
  "step": 5000
810
+ },
811
+ {
812
+ "epoch": 2.15,
813
+ "grad_norm": 5.002595901489258,
814
+ "learning_rate": 4.67547703180212e-05,
815
+ "loss": 2.0594,
816
+ "step": 5100
817
+ },
818
+ {
819
+ "epoch": 2.15,
820
+ "eval_cer": 0.5161682179030933,
821
+ "eval_loss": 2.4667067527770996,
822
+ "eval_runtime": 377.3711,
823
+ "eval_samples_per_second": 25.116,
824
+ "eval_steps_per_second": 3.14,
825
+ "step": 5100
826
+ },
827
+ {
828
+ "epoch": 2.19,
829
+ "grad_norm": 3.2300162315368652,
830
+ "learning_rate": 4.668409893992933e-05,
831
+ "loss": 1.8512,
832
+ "step": 5200
833
+ },
834
+ {
835
+ "epoch": 2.19,
836
+ "eval_cer": 0.5493185639982793,
837
+ "eval_loss": 1.743632435798645,
838
+ "eval_runtime": 361.1881,
839
+ "eval_samples_per_second": 26.241,
840
+ "eval_steps_per_second": 3.281,
841
+ "step": 5200
842
  }
843
  ],
844
  "logging_steps": 100,
 
846
  "num_input_tokens_seen": 0,
847
  "num_train_epochs": 30,
848
  "save_steps": 100,
849
+ "total_flos": 5.703634893009586e+19,
850
  "train_batch_size": 8,
851
  "trial_name": null,
852
  "trial_params": null