ashanhr commited on
Commit
3dd4caf
·
verified ·
1 Parent(s): ebbe776

Training in progress, step 37100, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0646fcf5cf97fb5503e2fbbc9e98c3960fec3e656e2c994965054eac8467df2b
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:381f57f24d93696513b5b64ff4e2b26e4aa55b4c5527b3fea9b18b1fc4cf72bc
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c691890dc2b6f3211ced073de26ccb0d7c08362c3a77fadc55959c2aea6190dd
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb33ab1b771a570de1383d0627b9c859532e9fc93dbf7d6c5f3ea94a017d8931
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65fd8a1c0a883c5d4e5251be70dcd09108e90ba7f22053441648adf6f980589f
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dede614aaa93a8236b1aaee0b270dbb1e0e0ee731df570c565b17414017d0f04
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86d17156c6a1f8e32f040e09547b8bc921e30af508cd3be63ca008523dbabbb3
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:953d28bb26d9f06c74ed4ce0f50682b88b7c1a740875d4c88a1a0b092ef89394
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc136ee3addae9eeb904fb18be82d28a7b235388655ca82f3c8bea405623e110
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bbb7a0763de581151b4ca9c96854f5bc48332bf11f33793e4cc2d52e736e678
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 15.533571879604294,
5
  "eval_steps": 100,
6
- "global_step": 36900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5911,6 +5911,38 @@
5911
  "eval_samples_per_second": 25.191,
5912
  "eval_steps_per_second": 3.15,
5913
  "step": 36900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5914
  }
5915
  ],
5916
  "logging_steps": 100,
@@ -5918,7 +5950,7 @@
5918
  "num_input_tokens_seen": 0,
5919
  "num_train_epochs": 30,
5920
  "save_steps": 100,
5921
- "total_flos": 4.044598170257006e+20,
5922
  "train_batch_size": 8,
5923
  "trial_name": null,
5924
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 15.617764681119764,
5
  "eval_steps": 100,
6
+ "global_step": 37100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5911
  "eval_samples_per_second": 25.191,
5912
  "eval_steps_per_second": 3.15,
5913
  "step": 36900
5914
+ },
5915
+ {
5916
+ "epoch": 15.58,
5917
+ "grad_norm": 5.990358352661133,
5918
+ "learning_rate": 2.4217667844522972e-05,
5919
+ "loss": 0.8376,
5920
+ "step": 37000
5921
+ },
5922
+ {
5923
+ "epoch": 15.58,
5924
+ "eval_cer": 0.38022809041492317,
5925
+ "eval_loss": 1.795945405960083,
5926
+ "eval_runtime": 393.6062,
5927
+ "eval_samples_per_second": 24.08,
5928
+ "eval_steps_per_second": 3.011,
5929
+ "step": 37000
5930
+ },
5931
+ {
5932
+ "epoch": 15.62,
5933
+ "grad_norm": 37.267982482910156,
5934
+ "learning_rate": 2.4146996466431096e-05,
5935
+ "loss": 0.8422,
5936
+ "step": 37100
5937
+ },
5938
+ {
5939
+ "epoch": 15.62,
5940
+ "eval_cer": 0.38063137929685975,
5941
+ "eval_loss": 2.6164886951446533,
5942
+ "eval_runtime": 378.7126,
5943
+ "eval_samples_per_second": 25.027,
5944
+ "eval_steps_per_second": 3.129,
5945
+ "step": 37100
5946
  }
5947
  ],
5948
  "logging_steps": 100,
 
5950
  "num_input_tokens_seen": 0,
5951
  "num_train_epochs": 30,
5952
  "save_steps": 100,
5953
+ "total_flos": 4.066405780809435e+20,
5954
  "train_batch_size": 8,
5955
  "trial_name": null,
5956
  "trial_params": null