ashanhr commited on
Commit
2721d9c
1 Parent(s): 4cc37f6

Training in progress, step 22900, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f673b59dec39b6208eaf91ca45400990b1ab1106cb32adb89e40289b3692b7e
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5de2a0b6f5605c28e3b76e90893140030beb717b3d1a72803f0f6f8bf22f20d3
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19267df07b64ec62c5070fedddb73269d34ceb079fafa1cc02aa086792ab3300
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fdeca2e925e60f963cb2238d682deedab54a56146007d4419812a3113ddc204
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc7974ef7e39b635024ed4786978103eb9d236e553fc1a70a6679023daa9296c
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66d10bfe6adc048388b735955f524b88e59f21f6f4b5176cd8a3a681534d740d
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8052a894386dd37f759deaab2c0d19acd6e44b9e9130c47c71e3f213d13e6053
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:024df13920a7bfc09c9ab0f4046389f896972fb4c1a844e4c41f17872fd80aa2
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d49d336b26493b459dab233dfc32ed3bf79ed6ce889dcb67006a60f916875b97
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a23d9aa0c3e1803c2e7edca514f5bf1ca5e6413b4e99acf0ac462c28ded1f82
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.555882972005893,
5
  "eval_steps": 100,
6
- "global_step": 22700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3639,6 +3639,38 @@
3639
  "eval_samples_per_second": 26.255,
3640
  "eval_steps_per_second": 3.283,
3641
  "step": 22700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3642
  }
3643
  ],
3644
  "logging_steps": 100,
@@ -3646,7 +3678,7 @@
3646
  "num_input_tokens_seen": 0,
3647
  "num_train_epochs": 30,
3648
  "save_steps": 100,
3649
- "total_flos": 2.487348846779533e+20,
3650
  "train_batch_size": 8,
3651
  "trial_name": null,
3652
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.640075773521364,
5
  "eval_steps": 100,
6
+ "global_step": 22900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3639
  "eval_samples_per_second": 26.255,
3640
  "eval_steps_per_second": 3.283,
3641
  "step": 22700
3642
+ },
3643
+ {
3644
+ "epoch": 9.6,
3645
+ "grad_norm": 4.784325122833252,
3646
+ "learning_rate": 3.4250176678445236e-05,
3647
+ "loss": 1.2519,
3648
+ "step": 22800
3649
+ },
3650
+ {
3651
+ "epoch": 9.6,
3652
+ "eval_cer": 0.4394284541081694,
3653
+ "eval_loss": 2.026627540588379,
3654
+ "eval_runtime": 383.883,
3655
+ "eval_samples_per_second": 24.69,
3656
+ "eval_steps_per_second": 3.087,
3657
+ "step": 22800
3658
+ },
3659
+ {
3660
+ "epoch": 9.64,
3661
+ "grad_norm": 34.55485534667969,
3662
+ "learning_rate": 3.417950530035336e-05,
3663
+ "loss": 1.2678,
3664
+ "step": 22900
3665
+ },
3666
+ {
3667
+ "epoch": 9.64,
3668
+ "eval_cer": 0.45279075906300104,
3669
+ "eval_loss": 1.6792824268341064,
3670
+ "eval_runtime": 362.9993,
3671
+ "eval_samples_per_second": 26.11,
3672
+ "eval_steps_per_second": 3.264,
3673
+ "step": 22900
3674
  }
3675
  ],
3676
  "logging_steps": 100,
 
3678
  "num_input_tokens_seen": 0,
3679
  "num_train_epochs": 30,
3680
  "save_steps": 100,
3681
+ "total_flos": 2.5097235719710438e+20,
3682
  "train_batch_size": 8,
3683
  "trial_name": null,
3684
  "trial_params": null