ashanhr commited on
Commit
d60c6ee
1 Parent(s): 579528c

Training in progress, step 47500, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0a7eca189b6cba3d0264a3a6b528f4d768503319d4957c82aafb34cecdfdf4a
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67401216b3029d2622e7e7add3d17e2a39d754ad2562031455d96e2f627ae67b
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d44c9dabcde04ec924b500d0f45e98bbda60772b9108bedac5d65641ce067cb9
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6921906aba6ad6cda84ac2dad9392fde118c107f9c07cce5a4b585abe35d446d
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5815b1015337a35948babb6d5b6ab9cf7c776143678a541486d413ba284536e
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb52b0ddcb7b49c326f60274849ef6aa3c79a378160d3f09440dbe910b761c44
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d54689fcde880091f1c0ee11daaede986eacb0d5ba40ec6c4cb9bad1bdacaa9
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c20e682f3fe40fa57d88e22486ddd5912b58563883c794fa3b1d8da05268377
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d0677dbdd096b9b20e1b9970b686999e123df432ab47b960e13026ac8fbafd2
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:372a4198b83fdb5e8fb2ecd27baf4c7bfc16f7ae1e02bdddaa16958cd86125f9
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 19.911597558408754,
5
  "eval_steps": 100,
6
- "global_step": 47300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7575,6 +7575,38 @@
7575
  "eval_samples_per_second": 24.565,
7576
  "eval_steps_per_second": 3.071,
7577
  "step": 47300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7578
  }
7579
  ],
7580
  "logging_steps": 100,
@@ -7582,7 +7614,7 @@
7582
  "num_input_tokens_seen": 0,
7583
  "num_train_epochs": 30,
7584
  "save_steps": 100,
7585
- "total_flos": 5.183308424712062e+20,
7586
  "train_batch_size": 8,
7587
  "trial_name": null,
7588
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 19.995790359924225,
5
  "eval_steps": 100,
6
+ "global_step": 47500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7575
  "eval_samples_per_second": 24.565,
7576
  "eval_steps_per_second": 3.071,
7577
  "step": 47300
7578
+ },
7579
+ {
7580
+ "epoch": 19.95,
7581
+ "grad_norm": 3.6561086177825928,
7582
+ "learning_rate": 1.6870671378091874e-05,
7583
+ "loss": 0.4914,
7584
+ "step": 47400
7585
+ },
7586
+ {
7587
+ "epoch": 19.95,
7588
+ "eval_cer": 0.3475079191271362,
7589
+ "eval_loss": 2.747345447540283,
7590
+ "eval_runtime": 406.0174,
7591
+ "eval_samples_per_second": 23.344,
7592
+ "eval_steps_per_second": 2.919,
7593
+ "step": 47400
7594
+ },
7595
+ {
7596
+ "epoch": 20.0,
7597
+ "grad_norm": 2.65822696685791,
7598
+ "learning_rate": 1.6800000000000002e-05,
7599
+ "loss": 0.4636,
7600
+ "step": 47500
7601
+ },
7602
+ {
7603
+ "epoch": 20.0,
7604
+ "eval_cer": 0.34717795549646085,
7605
+ "eval_loss": 3.2108983993530273,
7606
+ "eval_runtime": 391.3482,
7607
+ "eval_samples_per_second": 24.219,
7608
+ "eval_steps_per_second": 3.028,
7609
+ "step": 47500
7610
  }
7611
  ],
7612
  "logging_steps": 100,
 
7614
  "num_input_tokens_seen": 0,
7615
  "num_train_epochs": 30,
7616
  "save_steps": 100,
7617
+ "total_flos": 5.20459391384764e+20,
7618
  "train_batch_size": 8,
7619
  "trial_name": null,
7620
  "trial_params": null