ashanhr commited on
Commit
72027f3
1 Parent(s): d19cfc5

Training in progress, step 35500, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5e27caae33c684f3640dae183538f233c632e6b6dd013d1391ffa6812ef5b0f
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56ff4705ab488bb59741b0e5c71e9ac05b7326124a2bfb5d7f395e778b9050e3
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37818e67057bc9ab28e81eaabfd7516fb101e3ee984df530db2cd877e1ad02b7
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bebf964f27f1886af6b09595cbe151e56d1dfb73585f5c75ef9fe367ce5cb5c
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11dce48e3fc1d772256c604b93835d59113a4d934c93e60ceefb0e7a3d7b4c83
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43d29c51ee6b98cdc02945191da5c1b40cb5666775063ea99d3215e1a3f3026d
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dcb03d30f7f2fad34efa71219b03cd9586a94705f7898bc5dca76c5295036d2c
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7d96028437c689f30c391fd7b819c0717a3f42c10fd6e760f3a8d7f10e65590
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44ced050c05b9918ceaed2d8641bd55ffcbf9685f204bd1a0efee676b85d41a8
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2a895192cf0f0de5099223427191af438af44d9d236e07991f988cd0f9fc2af
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 14.86002946748053,
5
  "eval_steps": 100,
6
- "global_step": 35300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5655,6 +5655,38 @@
5655
  "eval_samples_per_second": 25.677,
5656
  "eval_steps_per_second": 3.21,
5657
  "step": 35300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5658
  }
5659
  ],
5660
  "logging_steps": 100,
@@ -5662,7 +5694,7 @@
5662
  "num_input_tokens_seen": 0,
5663
  "num_train_epochs": 30,
5664
  "save_steps": 100,
5665
- "total_flos": 3.8682651120044894e+20,
5666
  "train_batch_size": 8,
5667
  "trial_name": null,
5668
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 14.944222268996,
5
  "eval_steps": 100,
6
+ "global_step": 35500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5655
  "eval_samples_per_second": 25.677,
5656
  "eval_steps_per_second": 3.21,
5657
  "step": 35300
5658
+ },
5659
+ {
5660
+ "epoch": 14.9,
5661
+ "grad_norm": 2.0357742309570312,
5662
+ "learning_rate": 2.5348409893992936e-05,
5663
+ "loss": 0.9183,
5664
+ "step": 35400
5665
+ },
5666
+ {
5667
+ "epoch": 14.9,
5668
+ "eval_cer": 0.38374036995033434,
5669
+ "eval_loss": 1.656967043876648,
5670
+ "eval_runtime": 397.837,
5671
+ "eval_samples_per_second": 23.824,
5672
+ "eval_steps_per_second": 2.979,
5673
+ "step": 35400
5674
+ },
5675
+ {
5676
+ "epoch": 14.94,
5677
+ "grad_norm": 2.4356577396392822,
5678
+ "learning_rate": 2.527773851590106e-05,
5679
+ "loss": 0.9208,
5680
+ "step": 35500
5681
+ },
5682
+ {
5683
+ "epoch": 14.94,
5684
+ "eval_cer": 0.3846764889914356,
5685
+ "eval_loss": 1.6596330404281616,
5686
+ "eval_runtime": 368.5045,
5687
+ "eval_samples_per_second": 25.72,
5688
+ "eval_steps_per_second": 3.216,
5689
+ "step": 35500
5690
  }
5691
  ],
5692
  "logging_steps": 100,
 
5694
  "num_input_tokens_seen": 0,
5695
  "num_train_epochs": 30,
5696
  "save_steps": 100,
5697
+ "total_flos": 3.89001558499963e+20,
5698
  "train_batch_size": 8,
5699
  "trial_name": null,
5700
  "trial_params": null