ashanhr commited on
Commit
38b79fb
1 Parent(s): a67b93e

Training in progress, step 16900, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3132aba995deb3e9a62f8586490ea152a8c2191d407f35d7797370ed0f468775
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87bc7c877cdfbe54847b102871e2d4073e1e3b3e77f4628403c62609d4215ee0
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38bfe3bf3a40f1e030bac1d38293804c951b5e71979c1aa88a546da71cd72ede
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fa0adf8ce583fad0205dd992aeb22f4bb05366efc58cf6b0feae933acf7733a
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c761581dae884ee2dc52193ff57723855ada2efff7b89719e658ab91db8d56e1
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1057f87e50786d1aca543245abd8aa5218ae3920b5ed10328f1cc6e8d05b8fb8
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39ce27b6fe5c4a6a8ed11673dc9593b6a866d17de3e334d7e9d50d3687ae64fa
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70bc272cae15954849873b08592843a4166415a3756dbd7282f8116aed448377
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bb9f1f915324f76cd3b7d66524b9d86b094231ff2bf22c1a230837b1a765ae4
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5e32e74e7ac82af1d3f98e8ce057551c88fd1fff091ef003d7c5821593146b8
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.030098926541781,
5
  "eval_steps": 100,
6
- "global_step": 16700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2679,6 +2679,38 @@
2679
  "eval_samples_per_second": 26.016,
2680
  "eval_steps_per_second": 3.253,
2681
  "step": 16700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2682
  }
2683
  ],
2684
  "logging_steps": 100,
@@ -2686,7 +2718,7 @@
2686
  "num_input_tokens_seen": 0,
2687
  "num_train_epochs": 30,
2688
  "save_steps": 100,
2689
- "total_flos": 1.8305404207742734e+20,
2690
  "train_batch_size": 8,
2691
  "trial_name": null,
2692
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.1142917280572515,
5
  "eval_steps": 100,
6
+ "global_step": 16900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2679
  "eval_samples_per_second": 26.016,
2680
  "eval_steps_per_second": 3.253,
2681
  "step": 16700
2682
+ },
2683
+ {
2684
+ "epoch": 7.07,
2685
+ "grad_norm": 4.643332004547119,
2686
+ "learning_rate": 3.848833922261484e-05,
2687
+ "loss": 1.3365,
2688
+ "step": 16800
2689
+ },
2690
+ {
2691
+ "epoch": 7.07,
2692
+ "eval_cer": 0.46146024793711626,
2693
+ "eval_loss": 1.8595181703567505,
2694
+ "eval_runtime": 374.6805,
2695
+ "eval_samples_per_second": 25.296,
2696
+ "eval_steps_per_second": 3.163,
2697
+ "step": 16800
2698
+ },
2699
+ {
2700
+ "epoch": 7.11,
2701
+ "grad_norm": 2.433307409286499,
2702
+ "learning_rate": 3.841766784452297e-05,
2703
+ "loss": 1.6175,
2704
+ "step": 16900
2705
+ },
2706
+ {
2707
+ "epoch": 7.11,
2708
+ "eval_cer": 0.468870986664581,
2709
+ "eval_loss": 1.7079046964645386,
2710
+ "eval_runtime": 364.7345,
2711
+ "eval_samples_per_second": 25.986,
2712
+ "eval_steps_per_second": 3.249,
2713
+ "step": 16900
2714
  }
2715
  ],
2716
  "logging_steps": 100,
 
2718
  "num_input_tokens_seen": 0,
2719
  "num_train_epochs": 30,
2720
  "save_steps": 100,
2721
+ "total_flos": 1.8525968673735387e+20,
2722
  "train_batch_size": 8,
2723
  "trial_name": null,
2724
  "trial_params": null