ashanhr commited on
Commit
302b32c
1 Parent(s): 1d1007e

Training in progress, step 6400, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a863722ec1d87aec7fd9e98fc57ce968157fd4527814212a9254f9bf4b2eccd
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac7d16d29dc7622f01e2d28eecb7e0add759d0f9d8043d96d64fde190f2fa5ad
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5cb744f0d5f666422c5a7f99a1d8df94750f263c7d24426930046bab63c6d24b
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5af8744b4590c2dc3360130a9d9a624f707d729192543e3c10acf72e8b1c6050
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7eeab5a3fec411dde41e1e4da5efdaacfe0816dd24edffca8142d79b058cfe80
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e2eb66119faf4da4efd1d509bddf53a23b7f025570fd8de5b78afa3bd1309aa
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c02d15367d3e2c48c0709cb18e5d948989b3d743d9405d0ec8c31cf19092d33c
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a13ae0fb662f6207edb63c99af46719be7164af933cafaf35984cf8a7790df00
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:491f92ba9ce65f06837d35b0bf7a55395d7ef01dd1adf1e1cc66473509666736
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edca11951c56aed9e8cdf63fe04accd5c41069d01ff1b594b65f6a3496d35be5
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.609976846979583,
5
  "eval_steps": 100,
6
- "global_step": 6200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -999,6 +999,38 @@
999
  "eval_samples_per_second": 26.013,
1000
  "eval_steps_per_second": 3.252,
1001
  "step": 6200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1002
  }
1003
  ],
1004
  "logging_steps": 100,
@@ -1006,7 +1038,7 @@
1006
  "num_input_tokens_seen": 0,
1007
  "num_train_epochs": 30,
1008
  "save_steps": 100,
1009
- "total_flos": 6.794282177267883e+19,
1010
  "train_batch_size": 8,
1011
  "trial_name": null,
1012
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.6941696484950537,
5
  "eval_steps": 100,
6
+ "global_step": 6400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
999
  "eval_samples_per_second": 26.013,
1000
  "eval_steps_per_second": 3.252,
1001
  "step": 6200
1002
+ },
1003
+ {
1004
+ "epoch": 2.65,
1005
+ "grad_norm": 10.607972145080566,
1006
+ "learning_rate": 4.590742049469965e-05,
1007
+ "loss": 3.6419,
1008
+ "step": 6300
1009
+ },
1010
+ {
1011
+ "epoch": 2.65,
1012
+ "eval_cer": 0.5374105431934614,
1013
+ "eval_loss": 2.2622523307800293,
1014
+ "eval_runtime": 370.8325,
1015
+ "eval_samples_per_second": 25.559,
1016
+ "eval_steps_per_second": 3.196,
1017
+ "step": 6300
1018
+ },
1019
+ {
1020
+ "epoch": 2.69,
1021
+ "grad_norm": 4.678300857543945,
1022
+ "learning_rate": 4.5836749116607775e-05,
1023
+ "loss": 1.7309,
1024
+ "step": 6400
1025
+ },
1026
+ {
1027
+ "epoch": 2.69,
1028
+ "eval_cer": 0.5418393883696375,
1029
+ "eval_loss": 1.8007577657699585,
1030
+ "eval_runtime": 357.8741,
1031
+ "eval_samples_per_second": 26.484,
1032
+ "eval_steps_per_second": 3.311,
1033
+ "step": 6400
1034
  }
1035
  ],
1036
  "logging_steps": 100,
 
1038
  "num_input_tokens_seen": 0,
1039
  "num_train_epochs": 30,
1040
  "save_steps": 100,
1041
+ "total_flos": 7.012483001392289e+19,
1042
  "train_batch_size": 8,
1043
  "trial_name": null,
1044
  "trial_params": null