ashanhr commited on
Commit
46493f5
·
verified ·
1 Parent(s): 6c65a72

Training in progress, step 32300, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46b7ed34acfee810bf1df0de4de59541603b041a38fbb4dbcde73ee390120e45
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02ef20f83611dc5ecda3865d609d1c2529353faeb6e69db3373387d266c57e6d
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14e0b8af6860fa5a3a1dec8a82763be4461db9bc19598a99d13ba72ca5d0b4cd
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cea2f0b6eea82ec7b672c6f99ffe205fc0b7e54f20c87785b2f34e524aa97736
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb216ea78e406e00f2b6177c3e619ca935a753a4d2c9133ddb3865ce6a36e93d
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1f8cabf689066796731ca11e18b147c9b88c182baa68362d28e596087a999dd
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:180490a53f200a0ccf80e64da4cf9435033e2460013ac665192a9282d9293db7
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2604f2db63f8d40bcc859d3c5d9db567a5197e74b5fcf8b847396dd21324043f
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d4c398e04ff23b7cf398d4be71ed5c551bcad34152f6745e47db5a460b70aee
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:529f0d80ba72a29cd60cd97ed89d7289715ba99737c418335d0e26f49eb2d4a3
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 13.512944643233004,
5
  "eval_steps": 100,
6
- "global_step": 32100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5143,6 +5143,38 @@
5143
  "eval_samples_per_second": 25.555,
5144
  "eval_steps_per_second": 3.195,
5145
  "step": 32100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5146
  }
5147
  ],
5148
  "logging_steps": 100,
@@ -5150,7 +5182,7 @@
5150
  "num_input_tokens_seen": 0,
5151
  "num_train_epochs": 30,
5152
  "save_steps": 100,
5153
- "total_flos": 3.5171047954888104e+20,
5154
  "train_batch_size": 8,
5155
  "trial_name": null,
5156
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 13.597137444748475,
5
  "eval_steps": 100,
6
+ "global_step": 32300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5143
  "eval_samples_per_second": 25.555,
5144
  "eval_steps_per_second": 3.195,
5145
  "step": 32100
5146
+ },
5147
+ {
5148
+ "epoch": 13.56,
5149
+ "grad_norm": 4.829713344573975,
5150
+ "learning_rate": 2.7609187279151943e-05,
5151
+ "loss": 1.0437,
5152
+ "step": 32200
5153
+ },
5154
+ {
5155
+ "epoch": 13.56,
5156
+ "eval_cer": 0.4043276563294357,
5157
+ "eval_loss": 1.8479336500167847,
5158
+ "eval_runtime": 398.7479,
5159
+ "eval_samples_per_second": 23.769,
5160
+ "eval_steps_per_second": 2.972,
5161
+ "step": 32200
5162
+ },
5163
+ {
5164
+ "epoch": 13.6,
5165
+ "grad_norm": 3.0680336952209473,
5166
+ "learning_rate": 2.7538515901060075e-05,
5167
+ "loss": 1.4222,
5168
+ "step": 32300
5169
+ },
5170
+ {
5171
+ "epoch": 13.6,
5172
+ "eval_cer": 0.4040881271753158,
5173
+ "eval_loss": 2.828200101852417,
5174
+ "eval_runtime": 367.3731,
5175
+ "eval_samples_per_second": 25.799,
5176
+ "eval_steps_per_second": 3.226,
5177
+ "step": 32300
5178
  }
5179
  ],
5180
  "logging_steps": 100,
 
5182
  "num_input_tokens_seen": 0,
5183
  "num_train_epochs": 30,
5184
  "save_steps": 100,
5185
+ "total_flos": 3.5387141404690835e+20,
5186
  "train_batch_size": 8,
5187
  "trial_name": null,
5188
  "trial_params": null