ashanhr commited on
Commit
f3d41a3
1 Parent(s): 8b0a9da

Training in progress, step 35300, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d51827e7f72b6ac13c7717909863137b26f796acdc33d0c423939d31b26e877
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5e27caae33c684f3640dae183538f233c632e6b6dd013d1391ffa6812ef5b0f
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba371bbb66479ca88edc50cd8c19bfba12e60e4cf0ec3ad5a6c9f0889746173e
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37818e67057bc9ab28e81eaabfd7516fb101e3ee984df530db2cd877e1ad02b7
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c302482ccc44393e57785ca619efb81e7753d4f5ae50c608e086133a7e9658c
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11dce48e3fc1d772256c604b93835d59113a4d934c93e60ceefb0e7a3d7b4c83
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63135dd8fba75904a387c365b3db0392d738b290969b3cffabe4604b1916dbc0
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcb03d30f7f2fad34efa71219b03cd9586a94705f7898bc5dca76c5295036d2c
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:879222c4151189909f68ebdb26981095b855d468d9fc1fdcd4a5718b314345ac
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44ced050c05b9918ceaed2d8641bd55ffcbf9685f204bd1a0efee676b85d41a8
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 14.775836665965059,
5
  "eval_steps": 100,
6
- "global_step": 35100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5623,6 +5623,38 @@
5623
  "eval_samples_per_second": 25.719,
5624
  "eval_steps_per_second": 3.216,
5625
  "step": 35100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5626
  }
5627
  ],
5628
  "logging_steps": 100,
@@ -5630,7 +5662,7 @@
5630
  "num_input_tokens_seen": 0,
5631
  "num_train_epochs": 30,
5632
  "save_steps": 100,
5633
- "total_flos": 3.8462099464838395e+20,
5634
  "train_batch_size": 8,
5635
  "trial_name": null,
5636
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 14.86002946748053,
5
  "eval_steps": 100,
6
+ "global_step": 35300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5623
  "eval_samples_per_second": 25.719,
5624
  "eval_steps_per_second": 3.216,
5625
  "step": 35100
5626
+ },
5627
+ {
5628
+ "epoch": 14.82,
5629
+ "grad_norm": 37.1422119140625,
5630
+ "learning_rate": 2.548975265017668e-05,
5631
+ "loss": 0.9169,
5632
+ "step": 35200
5633
+ },
5634
+ {
5635
+ "epoch": 14.82,
5636
+ "eval_cer": 0.3885431739079426,
5637
+ "eval_loss": 2.9854750633239746,
5638
+ "eval_runtime": 393.2259,
5639
+ "eval_samples_per_second": 24.103,
5640
+ "eval_steps_per_second": 3.014,
5641
+ "step": 35200
5642
+ },
5643
+ {
5644
+ "epoch": 14.86,
5645
+ "grad_norm": 2.0946056842803955,
5646
+ "learning_rate": 2.5419081272084804e-05,
5647
+ "loss": 0.9188,
5648
+ "step": 35300
5649
+ },
5650
+ {
5651
+ "epoch": 14.86,
5652
+ "eval_cer": 0.38827187047827616,
5653
+ "eval_loss": 2.4621119499206543,
5654
+ "eval_runtime": 369.124,
5655
+ "eval_samples_per_second": 25.677,
5656
+ "eval_steps_per_second": 3.21,
5657
+ "step": 35300
5658
  }
5659
  ],
5660
  "logging_steps": 100,
 
5662
  "num_input_tokens_seen": 0,
5663
  "num_train_epochs": 30,
5664
  "save_steps": 100,
5665
+ "total_flos": 3.8682651120044894e+20,
5666
  "train_batch_size": 8,
5667
  "trial_name": null,
5668
  "trial_params": null