ashanhr commited on
Commit
9a668f3
1 Parent(s): 2dadc8b

Training in progress, step 55700, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3be30c038fe83e2c45066c8865edcd4f97638d7c9497d22507519438f766b9e
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:785057a17e70302c21c007e0ef0b0f57a36cfb96718c31c8f395b008e214458d
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc37f0c081dea232ed47687524d26d35478631d3af4618df338fc236e166a184
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19609ff9aeb7b6551ed81c17110b29191a5d741f3df4560f9d420d2d58bce8b9
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5479170340108cb57e7920fc9709a1e6e98ddda8596cfda43bc01b7188a5ccc2
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:293accb5c9dbd84af2034bf699ee6c8b6fc3c17f88796b7449387a733eaff90c
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70e547d0244ac6c17f72a2e2c794d7af278410f2b12b34a7dbb487ec9d9fcdcb
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cde67baa4dd7f0279761a9f5a42adbc49a7e57ab6ad2ad6773f0f379f28031b
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a30e334ce630adfe0f015710ad2e5f2148b3a474ea9fa476144c76c38fe45245
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d431db1e058e7753d522d0aa651c6ef43b7fbfb259f3bade675c44750e1cb84
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 23.363502420543043,
5
  "eval_steps": 100,
6
- "global_step": 55500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8887,6 +8887,38 @@
8887
  "eval_samples_per_second": 23.782,
8888
  "eval_steps_per_second": 2.973,
8889
  "step": 55500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8890
  }
8891
  ],
8892
  "logging_steps": 100,
@@ -8894,7 +8926,7 @@
8894
  "num_input_tokens_seen": 0,
8895
  "num_train_epochs": 30,
8896
  "save_steps": 100,
8897
- "total_flos": 6.081637645988172e+20,
8898
  "train_batch_size": 8,
8899
  "trial_name": null,
8900
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 23.447695222058513,
5
  "eval_steps": 100,
6
+ "global_step": 55700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8887
  "eval_samples_per_second": 23.782,
8888
  "eval_steps_per_second": 2.973,
8889
  "step": 55500
8890
+ },
8891
+ {
8892
+ "epoch": 23.41,
8893
+ "grad_norm": 2.3033080101013184,
8894
+ "learning_rate": 1.107703180212014e-05,
8895
+ "loss": 0.2574,
8896
+ "step": 55600
8897
+ },
8898
+ {
8899
+ "epoch": 23.41,
8900
+ "eval_cer": 0.3336567791638966,
8901
+ "eval_loss": 2.5886073112487793,
8902
+ "eval_runtime": 422.633,
8903
+ "eval_samples_per_second": 22.426,
8904
+ "eval_steps_per_second": 2.804,
8905
+ "step": 55600
8906
+ },
8907
+ {
8908
+ "epoch": 23.45,
8909
+ "grad_norm": 3.2907676696777344,
8910
+ "learning_rate": 1.100636042402827e-05,
8911
+ "loss": 0.2612,
8912
+ "step": 55700
8913
+ },
8914
+ {
8915
+ "epoch": 23.45,
8916
+ "eval_cer": 0.33410161902154784,
8917
+ "eval_loss": 2.841667413711548,
8918
+ "eval_runtime": 403.4827,
8919
+ "eval_samples_per_second": 23.49,
8920
+ "eval_steps_per_second": 2.937,
8921
+ "step": 55700
8922
  }
8923
  ],
8924
  "logging_steps": 100,
 
8926
  "num_input_tokens_seen": 0,
8927
  "num_train_epochs": 30,
8928
  "save_steps": 100,
8929
+ "total_flos": 6.103646459472622e+20,
8930
  "train_batch_size": 8,
8931
  "trial_name": null,
8932
  "trial_params": null