ashanhr commited on
Commit
75ea635
·
verified ·
1 Parent(s): 327550d

Training in progress, step 32500, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02ef20f83611dc5ecda3865d609d1c2529353faeb6e69db3373387d266c57e6d
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09eceed9906bfe27bd3680afd50c41121c73b82c7cd5ef345c69fad35c0ab8b8
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cea2f0b6eea82ec7b672c6f99ffe205fc0b7e54f20c87785b2f34e524aa97736
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bf99370e0c7555ecb290c63576152db41e5b2dbb69d0454ac7e5f89074f7bd4
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1f8cabf689066796731ca11e18b147c9b88c182baa68362d28e596087a999dd
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e617199afd6c3a950fb46336c901b1ff9024ac6556123985ed6b261b689855f
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2604f2db63f8d40bcc859d3c5d9db567a5197e74b5fcf8b847396dd21324043f
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd931d9164958a9409e10cbb2086014b2f7f92c6de1fd6882064a496d5c52211
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:529f0d80ba72a29cd60cd97ed89d7289715ba99737c418335d0e26f49eb2d4a3
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2b9126a6c1c8e3c86ea30228e288a1b5e1ab3597232760fc5ccce02139d2524
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 13.597137444748475,
5
  "eval_steps": 100,
6
- "global_step": 32300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5175,6 +5175,38 @@
5175
  "eval_samples_per_second": 25.799,
5176
  "eval_steps_per_second": 3.226,
5177
  "step": 32300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5178
  }
5179
  ],
5180
  "logging_steps": 100,
@@ -5182,7 +5214,7 @@
5182
  "num_input_tokens_seen": 0,
5183
  "num_train_epochs": 30,
5184
  "save_steps": 100,
5185
- "total_flos": 3.5387141404690835e+20,
5186
  "train_batch_size": 8,
5187
  "trial_name": null,
5188
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 13.681330246263945,
5
  "eval_steps": 100,
6
+ "global_step": 32500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5175
  "eval_samples_per_second": 25.799,
5176
  "eval_steps_per_second": 3.226,
5177
  "step": 32300
5178
+ },
5179
+ {
5180
+ "epoch": 13.64,
5181
+ "grad_norm": 2.740971565246582,
5182
+ "learning_rate": 2.74678445229682e-05,
5183
+ "loss": 1.0443,
5184
+ "step": 32400
5185
+ },
5186
+ {
5187
+ "epoch": 13.64,
5188
+ "eval_cer": 0.422089476359939,
5189
+ "eval_loss": 2.9092776775360107,
5190
+ "eval_runtime": 389.1697,
5191
+ "eval_samples_per_second": 24.354,
5192
+ "eval_steps_per_second": 3.045,
5193
+ "step": 32400
5194
+ },
5195
+ {
5196
+ "epoch": 13.68,
5197
+ "grad_norm": 7.612214088439941,
5198
+ "learning_rate": 2.7397173144876324e-05,
5199
+ "loss": 1.0561,
5200
+ "step": 32500
5201
+ },
5202
+ {
5203
+ "epoch": 13.68,
5204
+ "eval_cer": 0.4040587970748113,
5205
+ "eval_loss": 1.9982529878616333,
5206
+ "eval_runtime": 371.4689,
5207
+ "eval_samples_per_second": 25.515,
5208
+ "eval_steps_per_second": 3.19,
5209
+ "step": 32500
5210
  }
5211
  ],
5212
  "logging_steps": 100,
 
5214
  "num_input_tokens_seen": 0,
5215
  "num_train_epochs": 30,
5216
  "save_steps": 100,
5217
+ "total_flos": 3.560483539431464e+20,
5218
  "train_batch_size": 8,
5219
  "trial_name": null,
5220
  "trial_params": null