ashanhr commited on
Commit
a608ee9
1 Parent(s): adebd9c

Training in progress, step 33100, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43cf24c50a2fb90dff7e6f39ca95373b4b3a194b6e7b5ea3450b567d1cf2acd4
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80dede518c72c0bba9c223e7cfed76911fa6394ec26c0650d7e326e1c552fcec
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4045e3043dff6c75ceb1ceaf8651a36ad38e032419f1934e6b35b395ed37e495
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8698a6f801a918beb15989fb2bf3a1edbdc2ca6625d90e61434980880b6dbe95
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:830da9de182f0cddc597083bf4f5e081033ce68dba8352d3fc4bd7b82acf4629
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e88f56416695277fa9c9a9d86e755d054a221cbf389be803e09487f20a0bdc4c
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66bf72c9582e89b0605330cab03a1ed6574afa16c96e8aa002547f90558e0a41
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7801270dd1e1084f5e8f44290a74a4794810996f2cc1dc68247d892f66491210
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fea4ba0cc40f8b6ca722c0ae8b27293a5270b2e8eb60bc080bc213006fdf44bc
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09f7576dd710ccdafe6cc17194e31546db69ecc2b7cc2efcd8719730dbf27b3b
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 13.849715849294885,
5
  "eval_steps": 100,
6
- "global_step": 32900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5271,6 +5271,38 @@
5271
  "eval_samples_per_second": 25.413,
5272
  "eval_steps_per_second": 3.177,
5273
  "step": 32900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5274
  }
5275
  ],
5276
  "logging_steps": 100,
@@ -5278,7 +5310,7 @@
5278
  "num_input_tokens_seen": 0,
5279
  "num_train_epochs": 30,
5280
  "save_steps": 100,
5281
- "total_flos": 3.6049959871612964e+20,
5282
  "train_batch_size": 8,
5283
  "trial_name": null,
5284
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 13.933908650810356,
5
  "eval_steps": 100,
6
+ "global_step": 33100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5271
  "eval_samples_per_second": 25.413,
5272
  "eval_steps_per_second": 3.177,
5273
  "step": 32900
5274
+ },
5275
+ {
5276
+ "epoch": 13.89,
5277
+ "grad_norm": 73.3850326538086,
5278
+ "learning_rate": 2.7043816254416965e-05,
5279
+ "loss": 1.0211,
5280
+ "step": 33000
5281
+ },
5282
+ {
5283
+ "epoch": 13.89,
5284
+ "eval_cer": 0.405021802041375,
5285
+ "eval_loss": 2.4291627407073975,
5286
+ "eval_runtime": 390.776,
5287
+ "eval_samples_per_second": 24.254,
5288
+ "eval_steps_per_second": 3.032,
5289
+ "step": 33000
5290
+ },
5291
+ {
5292
+ "epoch": 13.93,
5293
+ "grad_norm": 2.177152395248413,
5294
+ "learning_rate": 2.697314487632509e-05,
5295
+ "loss": 1.2605,
5296
+ "step": 33100
5297
+ },
5298
+ {
5299
+ "epoch": 13.93,
5300
+ "eval_cer": 0.40312512220875213,
5301
+ "eval_loss": 2.450680732727051,
5302
+ "eval_runtime": 369.1148,
5303
+ "eval_samples_per_second": 25.678,
5304
+ "eval_steps_per_second": 3.21,
5305
+ "step": 33100
5306
  }
5307
  ],
5308
  "logging_steps": 100,
 
5310
  "num_input_tokens_seen": 0,
5311
  "num_train_epochs": 30,
5312
  "save_steps": 100,
5313
+ "total_flos": 3.627266703487163e+20,
5314
  "train_batch_size": 8,
5315
  "trial_name": null,
5316
  "trial_params": null