ashanhr commited on
Commit
c810b37
1 Parent(s): 253b354

Training in progress, step 41000, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75f6409a44328ccff19c64caff488439da9273da3c101449fa3123336a4468ea
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3f31e821b74dd2aecadf4a49ced761482cb24335fa070340f440dfac62ebd15
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4dcbe4d068720daaaee2a8da1dbc2bbe9c056e9518e3461c7459f79006631080
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff6ee0aff95bbd15a39f1188865d0051383c510dd4594f6c062ec608c68dc92c
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33d5708694aaf8845bf750d7154f0271164aa685b76c098f6dd40029eeb5662f
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:440e01e057f577e2f701487c44c7f69aaeaa1240bf4c1499312f3532285ec4eb
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72f3d277caa97e791c619eba8f2b402ab01caedcd3e1225af2b74b9180ced546
3
- size 14631
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68b009d8e28cf1d32b3e47f2da155cb0f031794cbe7f69274b2edda21d846b19
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34c4ef4e4f4492564f8cc89617cc7f93e8c2f6998eb6eef00126c81220e404a6
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ddc5f7d6b9326460646ca0179f41d1dca732c5948fbb1407ab5e2beb6ca76c2
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 17.175331509155967,
5
  "eval_steps": 100,
6
- "global_step": 40800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -6535,6 +6535,38 @@
6535
  "eval_samples_per_second": 24.92,
6536
  "eval_steps_per_second": 3.116,
6537
  "step": 40800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6538
  }
6539
  ],
6540
  "logging_steps": 100,
@@ -6542,7 +6574,7 @@
6542
  "num_input_tokens_seen": 0,
6543
  "num_train_epochs": 30,
6544
  "save_steps": 100,
6545
- "total_flos": 4.4722373414464927e+20,
6546
  "train_batch_size": 8,
6547
  "trial_name": null,
6548
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 17.259524310671438,
5
  "eval_steps": 100,
6
+ "global_step": 41000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
6535
  "eval_samples_per_second": 24.92,
6536
  "eval_steps_per_second": 3.116,
6537
  "step": 40800
6538
+ },
6539
+ {
6540
+ "epoch": 17.22,
6541
+ "grad_norm": 7.374648094177246,
6542
+ "learning_rate": 2.146219081272085e-05,
6543
+ "loss": 0.6245,
6544
+ "step": 40900
6545
+ },
6546
+ {
6547
+ "epoch": 17.22,
6548
+ "eval_cer": 0.36244916115912557,
6549
+ "eval_loss": 2.089902400970459,
6550
+ "eval_runtime": 399.0568,
6551
+ "eval_samples_per_second": 23.751,
6552
+ "eval_steps_per_second": 2.97,
6553
+ "step": 40900
6554
+ },
6555
+ {
6556
+ "epoch": 17.26,
6557
+ "grad_norm": 27.956933975219727,
6558
+ "learning_rate": 2.1391519434628975e-05,
6559
+ "loss": 0.6492,
6560
+ "step": 41000
6561
+ },
6562
+ {
6563
+ "epoch": 17.26,
6564
+ "eval_cer": 0.36416741621367954,
6565
+ "eval_loss": 2.015270948410034,
6566
+ "eval_runtime": 376.6268,
6567
+ "eval_samples_per_second": 25.165,
6568
+ "eval_steps_per_second": 3.146,
6569
+ "step": 41000
6570
  }
6571
  ],
6572
  "logging_steps": 100,
 
6574
  "num_input_tokens_seen": 0,
6575
  "num_train_epochs": 30,
6576
  "save_steps": 100,
6577
+ "total_flos": 4.49389546173303e+20,
6578
  "train_batch_size": 8,
6579
  "trial_name": null,
6580
  "trial_params": null