ashanhr commited on
Commit
7dca335
1 Parent(s): 1df6cef

Training in progress, step 45900, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8dcae94e0ba5659a94762126ca017500f9640d109bedc1a86e16f7c3a364e9f5
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:777938124b8f08a6fc51de91ba4f6a248e0db6ecac962b10d96fb7af7371d947
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4dca8d70c56075bf2982f5f2b6c6bc3e36a911dffcbaf478a2abecf931e8e68
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:426caf72cf84a12bb83fee44c5ab19d9a2c05db3c88c152eeba1f77afb9c94e6
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:194585cbf7d9cfd62136cd29e1512102cdc3ce800cad797ad714867a01a807e3
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e181b4508fa0c0aef068c5ce9add7596cdd91480b74f68ef60dd67316c6f7a0c
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01cb821d23c120c3f2fb4bfd56f972b2f4b5203cf129d2966782cb58aafc588a
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae2667702793407db19d3728855aeacbfeb7c14b67574cd2f6c8c45fc93cc266
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b83145c90d97eef0fc9a5597e2a0947d443830911cd43cef4159f379916f7334
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bac30970e8a48e7dc94fd4d8f03dc31e7111fa8cfb4c3ecd5d2ab0fbdb55b042
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 19.238055146284992,
5
  "eval_steps": 100,
6
- "global_step": 45700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7319,6 +7319,38 @@
7319
  "eval_samples_per_second": 25.748,
7320
  "eval_steps_per_second": 3.219,
7321
  "step": 45700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7322
  }
7323
  ],
7324
  "logging_steps": 100,
@@ -7326,7 +7358,7 @@
7326
  "num_input_tokens_seen": 0,
7327
  "num_train_epochs": 30,
7328
  "save_steps": 100,
7329
- "total_flos": 5.008227787418482e+20,
7330
  "train_batch_size": 8,
7331
  "trial_name": null,
7332
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 19.322247947800463,
5
  "eval_steps": 100,
6
+ "global_step": 45900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7319
  "eval_samples_per_second": 25.748,
7320
  "eval_steps_per_second": 3.219,
7321
  "step": 45700
7322
+ },
7323
+ {
7324
+ "epoch": 19.28,
7325
+ "grad_norm": 2.2689895629882812,
7326
+ "learning_rate": 1.8001413427561838e-05,
7327
+ "loss": 0.471,
7328
+ "step": 45800
7329
+ },
7330
+ {
7331
+ "epoch": 19.28,
7332
+ "eval_cer": 0.3532492863008877,
7333
+ "eval_loss": 2.3049328327178955,
7334
+ "eval_runtime": 377.068,
7335
+ "eval_samples_per_second": 25.136,
7336
+ "eval_steps_per_second": 3.143,
7337
+ "step": 45800
7338
+ },
7339
+ {
7340
+ "epoch": 19.32,
7341
+ "grad_norm": 6.475673198699951,
7342
+ "learning_rate": 1.7930742049469966e-05,
7343
+ "loss": 0.4834,
7344
+ "step": 45900
7345
+ },
7346
+ {
7347
+ "epoch": 19.32,
7348
+ "eval_cer": 0.3522642837589457,
7349
+ "eval_loss": 2.4670591354370117,
7350
+ "eval_runtime": 369.8781,
7351
+ "eval_samples_per_second": 25.625,
7352
+ "eval_steps_per_second": 3.204,
7353
+ "step": 45900
7354
  }
7355
  ],
7356
  "logging_steps": 100,
 
7358
  "num_input_tokens_seen": 0,
7359
  "num_train_epochs": 30,
7360
  "save_steps": 100,
7361
+ "total_flos": 5.030211590848743e+20,
7362
  "train_batch_size": 8,
7363
  "trial_name": null,
7364
  "trial_params": null