ashanhr commited on
Commit
90cd41d
·
verified ·
1 Parent(s): fe614ef

Training in progress, step 24100, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:534d2ec58806cf0406b243b3a40b55aced31f1221f35942206aebf1011f8eb45
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d1d86a67812950b8066ed1bd4785f9e5f74864e9e185c3c7d65a737e73412eb
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2de9be3becc541ed69b75b47fcb52e55d337b4bb44dd3461d867a50ece8370f
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9bb1ef4ed06294b4512beffd9b8417f18d9d6c0bf30582a61d1b2418d2d7ea6
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02a3ca665b49a6ee94542cba4571576580ed40077cbe8f242c5e2c4a3a9715e9
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2964d4840b876fd5ea402f55cbb3d3ecf1ccd015054120c9986b8308a80fe3b6
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4c012312b5f595ff9a144d4688b864116f1aa4fd2b3b74ba5004f78c1e3dd36
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29deb45594f2ffe1b33e01ccb1b744191d09d4b58ab92a56ff666176b2e013c6
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8afa21305abd66240d65fd4f9e149e423e42ae4f4d5f37065fa94d5df4b4f66
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed638b60d93524052d154552e2b26184c00d0eedea3ca18e5c2db2c0ab950927
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.061039781098716,
5
  "eval_steps": 100,
6
- "global_step": 23900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3831,6 +3831,38 @@
3831
  "eval_samples_per_second": 25.853,
3832
  "eval_steps_per_second": 3.232,
3833
  "step": 23900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3834
  }
3835
  ],
3836
  "logging_steps": 100,
@@ -3838,7 +3870,7 @@
3838
  "num_input_tokens_seen": 0,
3839
  "num_train_epochs": 30,
3840
  "save_steps": 100,
3841
- "total_flos": 2.6188595514552674e+20,
3842
  "train_batch_size": 8,
3843
  "trial_name": null,
3844
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.145232582614186,
5
  "eval_steps": 100,
6
+ "global_step": 24100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3831
  "eval_samples_per_second": 25.853,
3832
  "eval_steps_per_second": 3.232,
3833
  "step": 23900
3834
+ },
3835
+ {
3836
+ "epoch": 10.1,
3837
+ "grad_norm": 2.430925130844116,
3838
+ "learning_rate": 3.340212014134276e-05,
3839
+ "loss": 1.185,
3840
+ "step": 24000
3841
+ },
3842
+ {
3843
+ "epoch": 10.1,
3844
+ "eval_cer": 0.4357133080442689,
3845
+ "eval_loss": 1.5434983968734741,
3846
+ "eval_runtime": 385.0716,
3847
+ "eval_samples_per_second": 24.614,
3848
+ "eval_steps_per_second": 3.077,
3849
+ "step": 24000
3850
+ },
3851
+ {
3852
+ "epoch": 10.15,
3853
+ "grad_norm": 2.367770195007324,
3854
+ "learning_rate": 3.333144876325088e-05,
3855
+ "loss": 1.2225,
3856
+ "step": 24100
3857
+ },
3858
+ {
3859
+ "epoch": 10.15,
3860
+ "eval_cer": 0.4268898361425052,
3861
+ "eval_loss": 1.602295160293579,
3862
+ "eval_runtime": 368.0631,
3863
+ "eval_samples_per_second": 25.751,
3864
+ "eval_steps_per_second": 3.22,
3865
+ "step": 24100
3866
  }
3867
  ],
3868
  "logging_steps": 100,
 
3870
  "num_input_tokens_seen": 0,
3871
  "num_train_epochs": 30,
3872
  "save_steps": 100,
3873
+ "total_flos": 2.6405292408352522e+20,
3874
  "train_batch_size": 8,
3875
  "trial_name": null,
3876
  "trial_params": null