ashanhr commited on
Commit
cb781dd
1 Parent(s): 9d1b128

Training in progress, step 27900, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5a9b68d851de5d3211a5786bc155bb288dd74d58e4dc5493cda8e9bb8f5b66c
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d49c4af7d6780b5bb59189d0dd1867f0b68d29fb3ba703be67e4405ffdd2f7b
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a91c760806ff07f3048741537eb7db0ce9a9fc0545b24f11d5446b75977c0636
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3d226082b9d4eb89eb67558845e0d50815390f41da6ece0ea8d6981473441ce
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:006fe2f9891b8bb244bfb635af4ee42dc360b27e8e2fbe6b729ce4508aa080ac
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cb4a963bc15dc86cedf79321c880d41e114671433f6bd415c17a07b684173dd
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58fad5d4b2449cf648eabcb3d279346b6662a87e99fb79ac46b0c8815722381f
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dd54598a42f926ce0b6c3ce03e26f6e03ec6fcf83016be5d7c0d02f5bedcfc6
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:739f0eb8d8ec6d67ffc73cb7a845f894d3c8ae14810b720e71f07a82de397bb6
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0ced60e8e837d5a867fc1635f2587bd5e4112e1921c82905c17a08e817c88f2
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.660703009892654,
5
  "eval_steps": 100,
6
- "global_step": 27700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4439,6 +4439,38 @@
4439
  "eval_samples_per_second": 25.78,
4440
  "eval_steps_per_second": 3.223,
4441
  "step": 27700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4442
  }
4443
  ],
4444
  "logging_steps": 100,
@@ -4446,7 +4478,7 @@
4446
  "num_input_tokens_seen": 0,
4447
  "num_train_epochs": 30,
4448
  "save_steps": 100,
4449
- "total_flos": 3.035915998579374e+20,
4450
  "train_batch_size": 8,
4451
  "trial_name": null,
4452
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 11.744895811408124,
5
  "eval_steps": 100,
6
+ "global_step": 27900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4439
  "eval_samples_per_second": 25.78,
4440
  "eval_steps_per_second": 3.223,
4441
  "step": 27700
4442
+ },
4443
+ {
4444
+ "epoch": 11.7,
4445
+ "grad_norm": 6.817134380340576,
4446
+ "learning_rate": 3.071802120141343e-05,
4447
+ "loss": 1.581,
4448
+ "step": 27800
4449
+ },
4450
+ {
4451
+ "epoch": 11.7,
4452
+ "eval_cer": 0.4181641312424231,
4453
+ "eval_loss": 2.996872901916504,
4454
+ "eval_runtime": 382.8066,
4455
+ "eval_samples_per_second": 24.759,
4456
+ "eval_steps_per_second": 3.096,
4457
+ "step": 27800
4458
+ },
4459
+ {
4460
+ "epoch": 11.74,
4461
+ "grad_norm": 4.087586879730225,
4462
+ "learning_rate": 3.0647349823321555e-05,
4463
+ "loss": 1.1385,
4464
+ "step": 27900
4465
+ },
4466
+ {
4467
+ "epoch": 11.74,
4468
+ "eval_cer": 0.42436255914903603,
4469
+ "eval_loss": 2.688666343688965,
4470
+ "eval_runtime": 375.5428,
4471
+ "eval_samples_per_second": 25.238,
4472
+ "eval_steps_per_second": 3.155,
4473
+ "step": 27900
4474
  }
4475
  ],
4476
  "logging_steps": 100,
 
4478
  "num_input_tokens_seen": 0,
4479
  "num_train_epochs": 30,
4480
  "save_steps": 100,
4481
+ "total_flos": 3.057677998172635e+20,
4482
  "train_batch_size": 8,
4483
  "trial_name": null,
4484
  "trial_params": null