ashanhr commited on
Commit
b504abe
·
verified ·
1 Parent(s): bd945a2

Training in progress, step 56500, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1932436298b8fb538e4cf80bed7163defd092b6328eb4bbc212e7fb308846d2
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37b48cfd98f64c5d125f771e54c20395665d1d877037bac06b3572191009699e
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc9f7d35cf87748eddf9bf58f61c5ba700e389c31489d78a290697c06c313346
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44acee1aeb254e47b183795a7e974bb04e859045fbcb8a60cd68541f1f93e9ee
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ec88f79f8a5c3ad18d543cea65ddcfd2fe9aa517c23d778574b571f8236405d
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe2dee411cbfa983ddb90c55ee2a72b7fe583cabda1a874be6896ce43b84a43d
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a56a788fea959cad8157c6c6dc5342a179f5cfb4ced637d221d4c0e4c02d4c3c
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d2921f22486aac351c57cbb7c2191104e5b51ae494fc326c3d7293b77f62dc6
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b6a938385f789102516706256ee22f9b98b3e9613d4f67c8346e35dc285e21c
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25166827dfb1f80c901f38f580504a29f3bfd9dd8b2e815ea7c1992907c058ce
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 23.700273626604925,
5
  "eval_steps": 100,
6
- "global_step": 56300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -9015,6 +9015,38 @@
9015
  "eval_samples_per_second": 23.556,
9016
  "eval_steps_per_second": 2.945,
9017
  "step": 56300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9018
  }
9019
  ],
9020
  "logging_steps": 100,
@@ -9022,7 +9054,7 @@
9022
  "num_input_tokens_seen": 0,
9023
  "num_train_epochs": 30,
9024
  "save_steps": 100,
9025
- "total_flos": 6.169297958480331e+20,
9026
  "train_batch_size": 8,
9027
  "trial_name": null,
9028
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 23.784466428120396,
5
  "eval_steps": 100,
6
+ "global_step": 56500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
9015
  "eval_samples_per_second": 23.556,
9016
  "eval_steps_per_second": 2.945,
9017
  "step": 56300
9018
+ },
9019
+ {
9020
+ "epoch": 23.74,
9021
+ "grad_norm": 2.900815725326538,
9022
+ "learning_rate": 1.051166077738516e-05,
9023
+ "loss": 0.2671,
9024
+ "step": 56400
9025
+ },
9026
+ {
9027
+ "epoch": 23.74,
9028
+ "eval_cer": 0.33364211411364436,
9029
+ "eval_loss": 2.6333584785461426,
9030
+ "eval_runtime": 425.9033,
9031
+ "eval_samples_per_second": 22.254,
9032
+ "eval_steps_per_second": 2.782,
9033
+ "step": 56400
9034
+ },
9035
+ {
9036
+ "epoch": 23.78,
9037
+ "grad_norm": 1.366709589958191,
9038
+ "learning_rate": 1.0440989399293287e-05,
9039
+ "loss": 0.2509,
9040
+ "step": 56500
9041
+ },
9042
+ {
9043
+ "epoch": 23.78,
9044
+ "eval_cer": 0.3335125728364163,
9045
+ "eval_loss": 2.557394504547119,
9046
+ "eval_runtime": 402.0125,
9047
+ "eval_samples_per_second": 23.576,
9048
+ "eval_steps_per_second": 2.948,
9049
+ "step": 56500
9050
  }
9051
  ],
9052
  "logging_steps": 100,
 
9054
  "num_input_tokens_seen": 0,
9055
  "num_train_epochs": 30,
9056
  "save_steps": 100,
9057
+ "total_flos": 6.191168025761128e+20,
9058
  "train_batch_size": 8,
9059
  "trial_name": null,
9060
  "trial_params": null