ashanhr commited on
Commit
fa6d68c
·
verified ·
1 Parent(s): cb43d9a

Training in progress, step 6200, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43d64164218ceb70df3188e5d55bcfa4289cb63f7182305fcc19b63af2b99185
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a863722ec1d87aec7fd9e98fc57ce968157fd4527814212a9254f9bf4b2eccd
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0c5f7365e986a4df25beb4c71b990dd0017fde5984b8e75111d186387f61b95
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cb744f0d5f666422c5a7f99a1d8df94750f263c7d24426930046bab63c6d24b
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc61d44fe8afd95895c257c0e8778313394a3453a26e03b583759623c7260b89
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7eeab5a3fec411dde41e1e4da5efdaacfe0816dd24edffca8142d79b058cfe80
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f69fe8240e55c5995426573757e5623029e976dcfdb9639d983a89e4307225f9
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c02d15367d3e2c48c0709cb18e5d948989b3d743d9405d0ec8c31cf19092d33c
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8fe5c154bf4966b68a2d37446afad8d6e4131f9298c56c1acc1015e0b966ad66
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:491f92ba9ce65f06837d35b0bf7a55395d7ef01dd1adf1e1cc66473509666736
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.5257840454641127,
5
  "eval_steps": 100,
6
- "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -967,6 +967,38 @@
967
  "eval_samples_per_second": 26.063,
968
  "eval_steps_per_second": 3.259,
969
  "step": 6000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
970
  }
971
  ],
972
  "logging_steps": 100,
@@ -974,7 +1006,7 @@
974
  "num_input_tokens_seen": 0,
975
  "num_train_epochs": 30,
976
  "save_steps": 100,
977
- "total_flos": 6.577336385232916e+19,
978
  "train_batch_size": 8,
979
  "trial_name": null,
980
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.609976846979583,
5
  "eval_steps": 100,
6
+ "global_step": 6200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
967
  "eval_samples_per_second": 26.063,
968
  "eval_steps_per_second": 3.259,
969
  "step": 6000
970
+ },
971
+ {
972
+ "epoch": 2.57,
973
+ "grad_norm": 3.353976249694824,
974
+ "learning_rate": 4.604805653710248e-05,
975
+ "loss": 1.7398,
976
+ "step": 6100
977
+ },
978
+ {
979
+ "epoch": 2.57,
980
+ "eval_cer": 0.5255465175394001,
981
+ "eval_loss": 2.6437551975250244,
982
+ "eval_runtime": 380.4559,
983
+ "eval_samples_per_second": 24.912,
984
+ "eval_steps_per_second": 3.115,
985
+ "step": 6100
986
+ },
987
+ {
988
+ "epoch": 2.61,
989
+ "grad_norm": 2.671917676925659,
990
+ "learning_rate": 4.5978091872791526e-05,
991
+ "loss": 2.2359,
992
+ "step": 6200
993
+ },
994
+ {
995
+ "epoch": 2.61,
996
+ "eval_cer": 0.537452094169176,
997
+ "eval_loss": 3.4509007930755615,
998
+ "eval_runtime": 364.3511,
999
+ "eval_samples_per_second": 26.013,
1000
+ "eval_steps_per_second": 3.252,
1001
+ "step": 6200
1002
  }
1003
  ],
1004
  "logging_steps": 100,
 
1006
  "num_input_tokens_seen": 0,
1007
  "num_train_epochs": 30,
1008
  "save_steps": 100,
1009
+ "total_flos": 6.794282177267883e+19,
1010
  "train_batch_size": 8,
1011
  "trial_name": null,
1012
  "trial_params": null