ashanhr commited on
Commit
1acd544
·
verified ·
1 Parent(s): a4663a4

Training in progress, step 55900, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:785057a17e70302c21c007e0ef0b0f57a36cfb96718c31c8f395b008e214458d
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5350a3d670e63e0e724e9a948f10fee13c3e2f281c6e2cbb34e1e8d199bcddd6
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19609ff9aeb7b6551ed81c17110b29191a5d741f3df4560f9d420d2d58bce8b9
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c30ad926c245886d2d7b37d4b9f136480894a256273517a47903a260a6350a0f
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:293accb5c9dbd84af2034bf699ee6c8b6fc3c17f88796b7449387a733eaff90c
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dc2cf3451e42ae8925d9cbf46fea6e10421c2853d0ff857ca4521a7443ec2ed
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cde67baa4dd7f0279761a9f5a42adbc49a7e57ab6ad2ad6773f0f379f28031b
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffe56cb4545356428f0c754a6c3b227de992c60c335c1b79bf625d3be68349b6
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d431db1e058e7753d522d0aa651c6ef43b7fbfb259f3bade675c44750e1cb84
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86566b254e3e5928bcfd37fb46ba4a9e0762d9834543bb6f29fbaab70f97de5d
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 23.447695222058513,
5
  "eval_steps": 100,
6
- "global_step": 55700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8919,6 +8919,38 @@
8919
  "eval_samples_per_second": 23.49,
8920
  "eval_steps_per_second": 2.937,
8921
  "step": 55700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8922
  }
8923
  ],
8924
  "logging_steps": 100,
@@ -8926,7 +8958,7 @@
8926
  "num_input_tokens_seen": 0,
8927
  "num_train_epochs": 30,
8928
  "save_steps": 100,
8929
- "total_flos": 6.103646459472622e+20,
8930
  "train_batch_size": 8,
8931
  "trial_name": null,
8932
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 23.531888023573984,
5
  "eval_steps": 100,
6
+ "global_step": 55900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8919
  "eval_samples_per_second": 23.49,
8920
  "eval_steps_per_second": 2.937,
8921
  "step": 55700
8922
+ },
8923
+ {
8924
+ "epoch": 23.49,
8925
+ "grad_norm": 1.8252880573272705,
8926
+ "learning_rate": 1.0935689045936397e-05,
8927
+ "loss": 0.2634,
8928
+ "step": 55800
8929
+ },
8930
+ {
8931
+ "epoch": 23.49,
8932
+ "eval_cer": 0.3340087403699503,
8933
+ "eval_loss": 2.7159037590026855,
8934
+ "eval_runtime": 426.7576,
8935
+ "eval_samples_per_second": 22.209,
8936
+ "eval_steps_per_second": 2.777,
8937
+ "step": 55800
8938
+ },
8939
+ {
8940
+ "epoch": 23.53,
8941
+ "grad_norm": 1.250098466873169,
8942
+ "learning_rate": 1.0865017667844523e-05,
8943
+ "loss": 0.2592,
8944
+ "step": 55900
8945
+ },
8946
+ {
8947
+ "epoch": 23.53,
8948
+ "eval_cer": 0.3338596456923859,
8949
+ "eval_loss": 2.6912946701049805,
8950
+ "eval_runtime": 397.5959,
8951
+ "eval_samples_per_second": 23.838,
8952
+ "eval_steps_per_second": 2.98,
8953
+ "step": 55900
8954
  }
8955
  ],
8956
  "logging_steps": 100,
 
8958
  "num_input_tokens_seen": 0,
8959
  "num_train_epochs": 30,
8960
  "save_steps": 100,
8961
+ "total_flos": 6.12582004081098e+20,
8962
  "train_batch_size": 8,
8963
  "trial_name": null,
8964
  "trial_params": null