ashanhr commited on
Commit
cd53acc
1 Parent(s): 2b5689d

Training in progress, step 28900, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9d7a9a80fc16ae19f3ee016e1f87ebbe28f8e5c54d2dee20067ac1375b3a699
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d251d0c72ed5a7685017bde159d15ce6766dc5be3bb7e7e581c05a02a62c04c5
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6412766255f9548b27afe82cfd466bfa83c0ceff5e923f6bd16707cb31228ca1
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b752e7177166bebeb1530b608aac17f572602cfe388058cc6e1fe0d959654324
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb1d84e520f4f8a65b7489c9592eba3324b25ef0380fb698f857e6540f071004
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be5846d525ecdc166b3d6f60d5b1bd254c1dd87056dcca60cac2b041ad05f37b
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54b7c6a6814674e66826f920adadd693d50e8c9564cda211cba81879f559c0af
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:855244fb6d2f7e56e44886d31c5782f973bfa784c99f0334fee935eb6b3be025
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7cf7c52a861212ad700d353d281972076097e14210eec92b789d70cbdc6a0277
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8e7befc489c14232ad92a4c0e54914d7e1f33b1fa2d6656378fe60d0f3de637
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.997474215954536,
5
  "eval_steps": 100,
6
- "global_step": 28500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4567,6 +4567,70 @@
4567
  "eval_samples_per_second": 25.906,
4568
  "eval_steps_per_second": 3.239,
4569
  "step": 28500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4570
  }
4571
  ],
4572
  "logging_steps": 100,
@@ -4574,7 +4638,7 @@
4574
  "num_input_tokens_seen": 0,
4575
  "num_train_epochs": 30,
4576
  "save_steps": 100,
4577
- "total_flos": 3.122274035580518e+20,
4578
  "train_batch_size": 8,
4579
  "trial_name": null,
4580
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 12.165859818985476,
5
  "eval_steps": 100,
6
+ "global_step": 28900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4567
  "eval_samples_per_second": 25.906,
4568
  "eval_steps_per_second": 3.239,
4569
  "step": 28500
4570
+ },
4571
+ {
4572
+ "epoch": 12.04,
4573
+ "grad_norm": 7.630153656005859,
4574
+ "learning_rate": 3.015265017667845e-05,
4575
+ "loss": 1.0524,
4576
+ "step": 28600
4577
+ },
4578
+ {
4579
+ "epoch": 12.04,
4580
+ "eval_cer": 0.4113008877243753,
4581
+ "eval_loss": 1.9460973739624023,
4582
+ "eval_runtime": 389.3684,
4583
+ "eval_samples_per_second": 24.342,
4584
+ "eval_steps_per_second": 3.043,
4585
+ "step": 28600
4586
+ },
4587
+ {
4588
+ "epoch": 12.08,
4589
+ "grad_norm": 16.8402099609375,
4590
+ "learning_rate": 3.0081978798586573e-05,
4591
+ "loss": 1.073,
4592
+ "step": 28700
4593
+ },
4594
+ {
4595
+ "epoch": 12.08,
4596
+ "eval_cer": 0.4151773493410504,
4597
+ "eval_loss": 2.0268595218658447,
4598
+ "eval_runtime": 367.5014,
4599
+ "eval_samples_per_second": 25.79,
4600
+ "eval_steps_per_second": 3.224,
4601
+ "step": 28700
4602
+ },
4603
+ {
4604
+ "epoch": 12.12,
4605
+ "grad_norm": 1.9278266429901123,
4606
+ "learning_rate": 3.0011307420494698e-05,
4607
+ "loss": 1.0857,
4608
+ "step": 28800
4609
+ },
4610
+ {
4611
+ "epoch": 12.12,
4612
+ "eval_cer": 0.4125180868953111,
4613
+ "eval_loss": 1.7026644945144653,
4614
+ "eval_runtime": 390.3027,
4615
+ "eval_samples_per_second": 24.284,
4616
+ "eval_steps_per_second": 3.036,
4617
+ "step": 28800
4618
+ },
4619
+ {
4620
+ "epoch": 12.17,
4621
+ "grad_norm": 1.4629472494125366,
4622
+ "learning_rate": 2.994063604240283e-05,
4623
+ "loss": 1.1243,
4624
+ "step": 28900
4625
+ },
4626
+ {
4627
+ "epoch": 12.17,
4628
+ "eval_cer": 0.411283778499081,
4629
+ "eval_loss": 2.061549663543701,
4630
+ "eval_runtime": 372.4148,
4631
+ "eval_samples_per_second": 25.45,
4632
+ "eval_steps_per_second": 3.182,
4633
+ "step": 28900
4634
  }
4635
  ],
4636
  "logging_steps": 100,
 
4638
  "num_input_tokens_seen": 0,
4639
  "num_train_epochs": 30,
4640
  "save_steps": 100,
4641
+ "total_flos": 3.16704104262254e+20,
4642
  "train_batch_size": 8,
4643
  "trial_name": null,
4644
  "trial_params": null