ashanhr commited on
Commit
8cb264f
1 Parent(s): 861856e

Training in progress, step 28500, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43f58101992470ce99c1c24e2ed6ac1048281fa6539f6045c9dff060f69941d5
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9d7a9a80fc16ae19f3ee016e1f87ebbe28f8e5c54d2dee20067ac1375b3a699
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6704070a643620f09ad45b4926d920d7c7503d06723489d09b8e5566b2037939
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6412766255f9548b27afe82cfd466bfa83c0ceff5e923f6bd16707cb31228ca1
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4548ca236f4173864dbd0ca1a1498544aaf94a4e29a4240dab5c4e4067bc8c43
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb1d84e520f4f8a65b7489c9592eba3324b25ef0380fb698f857e6540f071004
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01741ba2843fcc38342b1b30b163edc00ac6120a04533362fb33d9ecac4ce1fe
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54b7c6a6814674e66826f920adadd693d50e8c9564cda211cba81879f559c0af
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4f04843d80970cd259c83429daf5f235586900bc4e8d66f6edb21ed7d2feb79
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cf7c52a861212ad700d353d281972076097e14210eec92b789d70cbdc6a0277
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.913281414439066,
5
  "eval_steps": 100,
6
- "global_step": 28300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4535,6 +4535,38 @@
4535
  "eval_samples_per_second": 25.816,
4536
  "eval_steps_per_second": 3.228,
4537
  "step": 28300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4538
  }
4539
  ],
4540
  "logging_steps": 100,
@@ -4542,7 +4574,7 @@
4542
  "num_input_tokens_seen": 0,
4543
  "num_train_epochs": 30,
4544
  "save_steps": 100,
4545
- "total_flos": 3.101048540517328e+20,
4546
  "train_batch_size": 8,
4547
  "trial_name": null,
4548
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 11.997474215954536,
5
  "eval_steps": 100,
6
+ "global_step": 28500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4535
  "eval_samples_per_second": 25.816,
4536
  "eval_steps_per_second": 3.228,
4537
  "step": 28300
4538
+ },
4539
+ {
4540
+ "epoch": 11.96,
4541
+ "grad_norm": 13.059289932250977,
4542
+ "learning_rate": 3.0293992932862192e-05,
4543
+ "loss": 1.1318,
4544
+ "step": 28400
4545
+ },
4546
+ {
4547
+ "epoch": 11.96,
4548
+ "eval_cer": 0.4224341050408666,
4549
+ "eval_loss": 2.1082444190979004,
4550
+ "eval_runtime": 393.3473,
4551
+ "eval_samples_per_second": 24.096,
4552
+ "eval_steps_per_second": 3.013,
4553
+ "step": 28400
4554
+ },
4555
+ {
4556
+ "epoch": 12.0,
4557
+ "grad_norm": 5.903038501739502,
4558
+ "learning_rate": 3.0223321554770317e-05,
4559
+ "loss": 1.1131,
4560
+ "step": 28500
4561
+ },
4562
+ {
4563
+ "epoch": 12.0,
4564
+ "eval_cer": 0.41807125259082556,
4565
+ "eval_loss": 1.7783021926879883,
4566
+ "eval_runtime": 365.8673,
4567
+ "eval_samples_per_second": 25.906,
4568
+ "eval_steps_per_second": 3.239,
4569
+ "step": 28500
4570
  }
4571
  ],
4572
  "logging_steps": 100,
 
4574
  "num_input_tokens_seen": 0,
4575
  "num_train_epochs": 30,
4576
  "save_steps": 100,
4577
+ "total_flos": 3.122274035580518e+20,
4578
  "train_batch_size": 8,
4579
  "trial_name": null,
4580
  "trial_params": null