ashanhr commited on
Commit
a0ceaa4
·
verified ·
1 Parent(s): 2219079

Training in progress, step 48500, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1224fff0fd037aa14796a69b165995604c17ed87b445e25c006cd69e9b552d67
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1672c0a76833a5b5822154c7fa186402def49a98de876f28ea3494cad8f8888d
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1145bdd2c2468c43b77a6185bc82e494de1f8a3c4cf0ba296fdef749783fa409
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36beb4b37fe86763115810c74e72bd6beb673e075cd6a49cc0bce66f1564a42e
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2de1effdfae800e08200d6d6606f7de34095179fc4045eadea03d4d6c7d9fd10
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fa6ee9fb6e8b8ff058d8531424821cbaa6688b19e17750aa45dece30fa41780
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d2803d972252df3ae1062a374992bcf7069240c26b2f467f5507d6c88b0e69c
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1366c26f46b0efee6d4bad587c69654e431d12bbbd1ef637f411a48b4d281d1
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b97e57d1bf31d08433b6e91c583a12e2591e8c3381bc5ab3dd2b0d64c5a237e
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e98968c8be08f3c058353eaf3e9c13f6c3ef943aba36fc721f7302bf66d7b2a7
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 20.332561565986108,
5
  "eval_steps": 100,
6
- "global_step": 48300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7735,6 +7735,38 @@
7735
  "eval_samples_per_second": 24.608,
7736
  "eval_steps_per_second": 3.077,
7737
  "step": 48300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7738
  }
7739
  ],
7740
  "logging_steps": 100,
@@ -7742,7 +7774,7 @@
7742
  "num_input_tokens_seen": 0,
7743
  "num_train_epochs": 30,
7744
  "save_steps": 100,
7745
- "total_flos": 5.292837083424306e+20,
7746
  "train_batch_size": 8,
7747
  "trial_name": null,
7748
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 20.41675436750158,
5
  "eval_steps": 100,
6
+ "global_step": 48500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7735
  "eval_samples_per_second": 24.608,
7736
  "eval_steps_per_second": 3.077,
7737
  "step": 48300
7738
+ },
7739
+ {
7740
+ "epoch": 20.37,
7741
+ "grad_norm": 6.375877857208252,
7742
+ "learning_rate": 1.6164664310954065e-05,
7743
+ "loss": 0.4005,
7744
+ "step": 48400
7745
+ },
7746
+ {
7747
+ "epoch": 20.37,
7748
+ "eval_cer": 0.3466133510617496,
7749
+ "eval_loss": 3.129476547241211,
7750
+ "eval_runtime": 408.2033,
7751
+ "eval_samples_per_second": 23.219,
7752
+ "eval_steps_per_second": 2.903,
7753
+ "step": 48400
7754
+ },
7755
+ {
7756
+ "epoch": 20.42,
7757
+ "grad_norm": 8.823423385620117,
7758
+ "learning_rate": 1.6093992932862193e-05,
7759
+ "loss": 0.3974,
7760
+ "step": 48500
7761
+ },
7762
+ {
7763
+ "epoch": 20.42,
7764
+ "eval_cer": 0.3472977200735208,
7765
+ "eval_loss": 2.675215482711792,
7766
+ "eval_runtime": 389.4912,
7767
+ "eval_samples_per_second": 24.334,
7768
+ "eval_steps_per_second": 3.042,
7769
+ "step": 48500
7770
  }
7771
  ],
7772
  "logging_steps": 100,
 
7774
  "num_input_tokens_seen": 0,
7775
  "num_train_epochs": 30,
7776
  "save_steps": 100,
7777
+ "total_flos": 5.314599801292227e+20,
7778
  "train_batch_size": 8,
7779
  "trial_name": null,
7780
  "trial_params": null