ashanhr commited on
Commit
5b1182a
1 Parent(s): dfe4fdd

Training in progress, step 23300, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5de2a0b6f5605c28e3b76e90893140030beb717b3d1a72803f0f6f8bf22f20d3
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:885e4086287d81a6c57b433d7482c8a891e84519bd6a20d1ec115d03c74c0ce0
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2fdeca2e925e60f963cb2238d682deedab54a56146007d4419812a3113ddc204
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5486ea2d5d852edcc9c66f324045c258e41e901f713ba2047c61095d581b32e9
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66d10bfe6adc048388b735955f524b88e59f21f6f4b5176cd8a3a681534d740d
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec688ceaf93ae471f57a8056ca6dbf8c0b36abdc291ae512e6cb0dbe74e4638d
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:024df13920a7bfc09c9ab0f4046389f896972fb4c1a844e4c41f17872fd80aa2
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b95ac58f573cfcf6a4b348ea728bffa80f65499c1f4c3ba4d255445fd485d15b
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a23d9aa0c3e1803c2e7edca514f5bf1ca5e6413b4e99acf0ac462c28ded1f82
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:889ad38937ee9ef0869ccade470844b4b0a09f60a10384d27c3be448b16e4af6
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.640075773521364,
5
  "eval_steps": 100,
6
- "global_step": 22900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3671,6 +3671,70 @@
3671
  "eval_samples_per_second": 26.11,
3672
  "eval_steps_per_second": 3.264,
3673
  "step": 22900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3674
  }
3675
  ],
3676
  "logging_steps": 100,
@@ -3678,7 +3742,7 @@
3678
  "num_input_tokens_seen": 0,
3679
  "num_train_epochs": 30,
3680
  "save_steps": 100,
3681
- "total_flos": 2.5097235719710438e+20,
3682
  "train_batch_size": 8,
3683
  "trial_name": null,
3684
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.808461376552305,
5
  "eval_steps": 100,
6
+ "global_step": 23300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3671
  "eval_samples_per_second": 26.11,
3672
  "eval_steps_per_second": 3.264,
3673
  "step": 22900
3674
+ },
3675
+ {
3676
+ "epoch": 9.68,
3677
+ "grad_norm": 8.905746459960938,
3678
+ "learning_rate": 3.4108833922261485e-05,
3679
+ "loss": 1.2559,
3680
+ "step": 23000
3681
+ },
3682
+ {
3683
+ "epoch": 9.68,
3684
+ "eval_cer": 0.452297035704509,
3685
+ "eval_loss": 1.6737189292907715,
3686
+ "eval_runtime": 383.7761,
3687
+ "eval_samples_per_second": 24.697,
3688
+ "eval_steps_per_second": 3.088,
3689
+ "step": 23000
3690
+ },
3691
+ {
3692
+ "epoch": 9.72,
3693
+ "grad_norm": 3.8121345043182373,
3694
+ "learning_rate": 3.4038162544169613e-05,
3695
+ "loss": 1.2646,
3696
+ "step": 23100
3697
+ },
3698
+ {
3699
+ "epoch": 9.72,
3700
+ "eval_cer": 0.45129247976223064,
3701
+ "eval_loss": 1.4810179471969604,
3702
+ "eval_runtime": 361.7355,
3703
+ "eval_samples_per_second": 26.201,
3704
+ "eval_steps_per_second": 3.276,
3705
+ "step": 23100
3706
+ },
3707
+ {
3708
+ "epoch": 9.77,
3709
+ "grad_norm": 1.6551660299301147,
3710
+ "learning_rate": 3.396749116607774e-05,
3711
+ "loss": 1.2812,
3712
+ "step": 23200
3713
+ },
3714
+ {
3715
+ "epoch": 9.77,
3716
+ "eval_cer": 0.4401079347698565,
3717
+ "eval_loss": 1.4080007076263428,
3718
+ "eval_runtime": 387.5504,
3719
+ "eval_samples_per_second": 24.456,
3720
+ "eval_steps_per_second": 3.058,
3721
+ "step": 23200
3722
+ },
3723
+ {
3724
+ "epoch": 9.81,
3725
+ "grad_norm": 1.3945401906967163,
3726
+ "learning_rate": 3.389681978798587e-05,
3727
+ "loss": 1.2539,
3728
+ "step": 23300
3729
+ },
3730
+ {
3731
+ "epoch": 9.81,
3732
+ "eval_cer": 0.44230280395760824,
3733
+ "eval_loss": 1.8802071809768677,
3734
+ "eval_runtime": 376.3421,
3735
+ "eval_samples_per_second": 25.185,
3736
+ "eval_steps_per_second": 3.149,
3737
+ "step": 23300
3738
  }
3739
  ],
3740
  "logging_steps": 100,
 
3742
  "num_input_tokens_seen": 0,
3743
  "num_train_epochs": 30,
3744
  "save_steps": 100,
3745
+ "total_flos": 2.5535582493052453e+20,
3746
  "train_batch_size": 8,
3747
  "trial_name": null,
3748
  "trial_params": null