ashanhr commited on
Commit
f945e37
1 Parent(s): 113444b

Training in progress, step 56100, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5350a3d670e63e0e724e9a948f10fee13c3e2f281c6e2cbb34e1e8d199bcddd6
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ab4065c03764effc8670cef485e7172138b847944296ddc1d4ac63592bb57b3
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c30ad926c245886d2d7b37d4b9f136480894a256273517a47903a260a6350a0f
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18c6c84b58be65267870df3ab069b46c0ec0719004b480fb2a5a41f9b2c0d5b5
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0dc2cf3451e42ae8925d9cbf46fea6e10421c2853d0ff857ca4521a7443ec2ed
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83b49b95053c311733c21b0f1a9bbdd3669c30e7fc3caf207b5862e66e32be6c
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffe56cb4545356428f0c754a6c3b227de992c60c335c1b79bf625d3be68349b6
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00cb33b6fce6dba04a33d05c1cb2d3d6d099ac011324c13a509e8848d1669b14
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86566b254e3e5928bcfd37fb46ba4a9e0762d9834543bb6f29fbaab70f97de5d
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1305d66ef8f2afa8554deae5dd51afcd829f2485cde50c618b4fe4df3ce6276
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 23.531888023573984,
5
  "eval_steps": 100,
6
- "global_step": 55900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8951,6 +8951,38 @@
8951
  "eval_samples_per_second": 23.838,
8952
  "eval_steps_per_second": 2.98,
8953
  "step": 55900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8954
  }
8955
  ],
8956
  "logging_steps": 100,
@@ -8958,7 +8990,7 @@
8958
  "num_input_tokens_seen": 0,
8959
  "num_train_epochs": 30,
8960
  "save_steps": 100,
8961
- "total_flos": 6.12582004081098e+20,
8962
  "train_batch_size": 8,
8963
  "trial_name": null,
8964
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 23.616080825089455,
5
  "eval_steps": 100,
6
+ "global_step": 56100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8951
  "eval_samples_per_second": 23.838,
8952
  "eval_steps_per_second": 2.98,
8953
  "step": 55900
8954
+ },
8955
+ {
8956
+ "epoch": 23.57,
8957
+ "grad_norm": 3.1888105869293213,
8958
+ "learning_rate": 1.0794346289752651e-05,
8959
+ "loss": 0.2537,
8960
+ "step": 56000
8961
+ },
8962
+ {
8963
+ "epoch": 23.57,
8964
+ "eval_cer": 0.3346197841304603,
8965
+ "eval_loss": 2.708544969558716,
8966
+ "eval_runtime": 427.2487,
8967
+ "eval_samples_per_second": 22.184,
8968
+ "eval_steps_per_second": 2.774,
8969
+ "step": 56000
8970
+ },
8971
+ {
8972
+ "epoch": 23.62,
8973
+ "grad_norm": 9.785385131835938,
8974
+ "learning_rate": 1.0723674911660778e-05,
8975
+ "loss": 0.2622,
8976
+ "step": 56100
8977
+ },
8978
+ {
8979
+ "epoch": 23.62,
8980
+ "eval_cer": 0.3341309491220523,
8981
+ "eval_loss": 2.625296115875244,
8982
+ "eval_runtime": 401.2011,
8983
+ "eval_samples_per_second": 23.624,
8984
+ "eval_steps_per_second": 2.954,
8985
+ "step": 56100
8986
  }
8987
  ],
8988
  "logging_steps": 100,
 
8990
  "num_input_tokens_seen": 0,
8991
  "num_train_epochs": 30,
8992
  "save_steps": 100,
8993
+ "total_flos": 6.14754805865386e+20,
8994
  "train_batch_size": 8,
8995
  "trial_name": null,
8996
  "trial_params": null