ashanhr commited on
Commit
52c0b23
·
verified ·
1 Parent(s): 6ba7743

Training in progress, step 45700, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba72df5975c8b99cbc6d6a6d24dea915763e37bcfeeb094b07a32702d90892df
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dcae94e0ba5659a94762126ca017500f9640d109bedc1a86e16f7c3a364e9f5
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9bc26a5cbbbc20d7fd8697e5046c3103c37dc49f7a44a61a5442d759e5639da
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4dca8d70c56075bf2982f5f2b6c6bc3e36a911dffcbaf478a2abecf931e8e68
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:911113640d61c316fc766e86411da7bb59e1d0e3ce7f49e464f18e99ea654517
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:194585cbf7d9cfd62136cd29e1512102cdc3ce800cad797ad714867a01a807e3
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea78b13ed690a2241b4d959d7b6ee1a7ec9311794f20d4d9c39e9088e224fc5e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01cb821d23c120c3f2fb4bfd56f972b2f4b5203cf129d2966782cb58aafc588a
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:388522323e4003fdf0f1aab6ae314ad13e012d10187ec9e801314752b5856b73
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b83145c90d97eef0fc9a5597e2a0947d443830911cd43cef4159f379916f7334
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 19.15386234476952,
5
  "eval_steps": 100,
6
- "global_step": 45500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7287,6 +7287,38 @@
7287
  "eval_samples_per_second": 25.903,
7288
  "eval_steps_per_second": 3.239,
7289
  "step": 45500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7290
  }
7291
  ],
7292
  "logging_steps": 100,
@@ -7294,7 +7326,7 @@
7294
  "num_input_tokens_seen": 0,
7295
  "num_train_epochs": 30,
7296
  "save_steps": 100,
7297
- "total_flos": 4.9862113247753096e+20,
7298
  "train_batch_size": 8,
7299
  "trial_name": null,
7300
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 19.238055146284992,
5
  "eval_steps": 100,
6
+ "global_step": 45700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7287
  "eval_samples_per_second": 25.903,
7288
  "eval_steps_per_second": 3.239,
7289
  "step": 45500
7290
+ },
7291
+ {
7292
+ "epoch": 19.2,
7293
+ "grad_norm": 5.378861427307129,
7294
+ "learning_rate": 1.8142756183745585e-05,
7295
+ "loss": 0.4807,
7296
+ "step": 45600
7297
+ },
7298
+ {
7299
+ "epoch": 19.2,
7300
+ "eval_cer": 0.35123773024128896,
7301
+ "eval_loss": 2.7749032974243164,
7302
+ "eval_runtime": 380.1844,
7303
+ "eval_samples_per_second": 24.93,
7304
+ "eval_steps_per_second": 3.117,
7305
+ "step": 45600
7306
+ },
7307
+ {
7308
+ "epoch": 19.24,
7309
+ "grad_norm": 1.9357730150222778,
7310
+ "learning_rate": 1.807208480565371e-05,
7311
+ "loss": 0.4868,
7312
+ "step": 45700
7313
+ },
7314
+ {
7315
+ "epoch": 19.24,
7316
+ "eval_cer": 0.35421473544249343,
7317
+ "eval_loss": 2.5809061527252197,
7318
+ "eval_runtime": 368.1076,
7319
+ "eval_samples_per_second": 25.748,
7320
+ "eval_steps_per_second": 3.219,
7321
+ "step": 45700
7322
  }
7323
  ],
7324
  "logging_steps": 100,
 
7326
  "num_input_tokens_seen": 0,
7327
  "num_train_epochs": 30,
7328
  "save_steps": 100,
7329
+ "total_flos": 5.008227787418482e+20,
7330
  "train_batch_size": 8,
7331
  "trial_name": null,
7332
  "trial_params": null