ashanhr commited on
Commit
8053f16
·
verified ·
1 Parent(s): 5ab242a

Training in progress, step 26300, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6256e490b0e43c626084d84a341037e37434fd9bcabcbc558b3172b0e909df6
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40d05fd16c8588673c1e6ccde78de203e04e081ce27b72ee44b52315e644b740
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8951c5ab590b2778d41e2dc16e2017cb3db0033da9f8d4a91ee266d3a370c3c
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5812acb9bb3d64b8d63f94464a796cff46b7ad7f07e8e2c4a1aa5e63c47a7c54
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d909f75ea0266c0aca1a09ff4c06a42792691131283c14fd1a26cdd5f46c7cd8
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b23cd02c502517bc70ad64a5dec054c9efc4df18822ed05bd1e7e119d953775
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6db111608e60b5c73e6c2423d408c2a31f6a8c6486b5343e756b1090ce10da2e
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34fbd3601d535cbde5b18a032775aec5cee5099b7fbfab69d247bf1219d7534a
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78b045b8a4735f098f688aa2d4a47ca9b1dff6138fd3367077decbe825c59f79
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92ad90ae3571dbb88467083f26508c8007da34c4d92b38346a376256e4c52e28
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.90296779625342,
5
  "eval_steps": 100,
6
- "global_step": 25900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4151,6 +4151,70 @@
4151
  "eval_samples_per_second": 25.933,
4152
  "eval_steps_per_second": 3.242,
4153
  "step": 25900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4154
  }
4155
  ],
4156
  "logging_steps": 100,
@@ -4158,7 +4222,7 @@
4158
  "num_input_tokens_seen": 0,
4159
  "num_train_epochs": 30,
4160
  "save_steps": 100,
4161
- "total_flos": 2.8377007760000103e+20,
4162
  "train_batch_size": 8,
4163
  "trial_name": null,
4164
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 11.07135339928436,
5
  "eval_steps": 100,
6
+ "global_step": 26300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4151
  "eval_samples_per_second": 25.933,
4152
  "eval_steps_per_second": 3.242,
4153
  "step": 25900
4154
+ },
4155
+ {
4156
+ "epoch": 10.95,
4157
+ "grad_norm": 3.8992362022399902,
4158
+ "learning_rate": 3.198939929328622e-05,
4159
+ "loss": 1.1808,
4160
+ "step": 26000
4161
+ },
4162
+ {
4163
+ "epoch": 10.95,
4164
+ "eval_cer": 0.419022036682179,
4165
+ "eval_loss": 1.6932332515716553,
4166
+ "eval_runtime": 384.5535,
4167
+ "eval_samples_per_second": 24.647,
4168
+ "eval_steps_per_second": 3.081,
4169
+ "step": 26000
4170
+ },
4171
+ {
4172
+ "epoch": 10.99,
4173
+ "grad_norm": 64.95602416992188,
4174
+ "learning_rate": 3.191872791519435e-05,
4175
+ "loss": 1.2023,
4176
+ "step": 26100
4177
+ },
4178
+ {
4179
+ "epoch": 10.99,
4180
+ "eval_cer": 0.4221408040358218,
4181
+ "eval_loss": 1.5864986181259155,
4182
+ "eval_runtime": 363.5244,
4183
+ "eval_samples_per_second": 26.073,
4184
+ "eval_steps_per_second": 3.26,
4185
+ "step": 26100
4186
+ },
4187
+ {
4188
+ "epoch": 11.03,
4189
+ "grad_norm": 1.6677500009536743,
4190
+ "learning_rate": 3.184805653710248e-05,
4191
+ "loss": 1.1585,
4192
+ "step": 26200
4193
+ },
4194
+ {
4195
+ "epoch": 11.03,
4196
+ "eval_cer": 0.4356106526925032,
4197
+ "eval_loss": 1.4082372188568115,
4198
+ "eval_runtime": 386.3668,
4199
+ "eval_samples_per_second": 24.531,
4200
+ "eval_steps_per_second": 3.067,
4201
+ "step": 26200
4202
+ },
4203
+ {
4204
+ "epoch": 11.07,
4205
+ "grad_norm": 1.4230600595474243,
4206
+ "learning_rate": 3.1777385159010606e-05,
4207
+ "loss": 1.2774,
4208
+ "step": 26300
4209
+ },
4210
+ {
4211
+ "epoch": 11.07,
4212
+ "eval_cer": 0.41568573774979467,
4213
+ "eval_loss": 1.6947673559188843,
4214
+ "eval_runtime": 370.2625,
4215
+ "eval_samples_per_second": 25.598,
4216
+ "eval_steps_per_second": 3.2,
4217
+ "step": 26300
4218
  }
4219
  ],
4220
  "logging_steps": 100,
 
4222
  "num_input_tokens_seen": 0,
4223
  "num_train_epochs": 30,
4224
  "save_steps": 100,
4225
+ "total_flos": 2.8824923424902952e+20,
4226
  "train_batch_size": 8,
4227
  "trial_name": null,
4228
  "trial_params": null