ashanhr commited on
Commit
3d0b028
·
verified ·
1 Parent(s): cf48e5b

Training in progress, step 41400, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3f31e821b74dd2aecadf4a49ced761482cb24335fa070340f440dfac62ebd15
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8a24d4d54212b7695bf8a287d4f8eb9270a6d60d13f9a931c5779f13538973f
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff6ee0aff95bbd15a39f1188865d0051383c510dd4594f6c062ec608c68dc92c
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c05975af95ab79acf8a310695dfc38749b93829e001cc546ee9d4792a95bad7
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:440e01e057f577e2f701487c44c7f69aaeaa1240bf4c1499312f3532285ec4eb
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe48223f5c90918abbcc4c7259c7f6f8efbe538b78aea4ef8ef1741ac5a2fe8c
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68b009d8e28cf1d32b3e47f2da155cb0f031794cbe7f69274b2edda21d846b19
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52d26f522d8cbc1a8729bb0644c4ccc57cbd2a2a52d55e5adac8c2487f5ed38e
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ddc5f7d6b9326460646ca0179f41d1dca732c5948fbb1407ab5e2beb6ca76c2
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc876a1c5dec1820929e730793e2c174c8e91d009bdd7e30b3f49cc015882f56
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 17.259524310671438,
5
  "eval_steps": 100,
6
- "global_step": 41000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -6567,6 +6567,70 @@
6567
  "eval_samples_per_second": 25.165,
6568
  "eval_steps_per_second": 3.146,
6569
  "step": 41000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6570
  }
6571
  ],
6572
  "logging_steps": 100,
@@ -6574,7 +6638,7 @@
6574
  "num_input_tokens_seen": 0,
6575
  "num_train_epochs": 30,
6576
  "save_steps": 100,
6577
- "total_flos": 4.49389546173303e+20,
6578
  "train_batch_size": 8,
6579
  "trial_name": null,
6580
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 17.42790991370238,
5
  "eval_steps": 100,
6
+ "global_step": 41400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
6567
  "eval_samples_per_second": 25.165,
6568
  "eval_steps_per_second": 3.146,
6569
  "step": 41000
6570
+ },
6571
+ {
6572
+ "epoch": 17.3,
6573
+ "grad_norm": 5.671442031860352,
6574
+ "learning_rate": 2.1320848056537103e-05,
6575
+ "loss": 0.6544,
6576
+ "step": 41100
6577
+ },
6578
+ {
6579
+ "epoch": 17.3,
6580
+ "eval_cer": 0.3657659066911736,
6581
+ "eval_loss": 1.7072508335113525,
6582
+ "eval_runtime": 397.2129,
6583
+ "eval_samples_per_second": 23.861,
6584
+ "eval_steps_per_second": 2.983,
6585
+ "step": 41100
6586
+ },
6587
+ {
6588
+ "epoch": 17.34,
6589
+ "grad_norm": 1.6116443872451782,
6590
+ "learning_rate": 2.125017667844523e-05,
6591
+ "loss": 0.6304,
6592
+ "step": 41200
6593
+ },
6594
+ {
6595
+ "epoch": 17.34,
6596
+ "eval_cer": 0.36735950881858354,
6597
+ "eval_loss": 2.110800266265869,
6598
+ "eval_runtime": 379.2769,
6599
+ "eval_samples_per_second": 24.99,
6600
+ "eval_steps_per_second": 3.124,
6601
+ "step": 41200
6602
+ },
6603
+ {
6604
+ "epoch": 17.39,
6605
+ "grad_norm": 2.9379818439483643,
6606
+ "learning_rate": 2.117950530035336e-05,
6607
+ "loss": 0.6426,
6608
+ "step": 41300
6609
+ },
6610
+ {
6611
+ "epoch": 17.39,
6612
+ "eval_cer": 0.3644313871182199,
6613
+ "eval_loss": 2.184722900390625,
6614
+ "eval_runtime": 397.6084,
6615
+ "eval_samples_per_second": 23.838,
6616
+ "eval_steps_per_second": 2.98,
6617
+ "step": 41300
6618
+ },
6619
+ {
6620
+ "epoch": 17.43,
6621
+ "grad_norm": 2.8842365741729736,
6622
+ "learning_rate": 2.1108833922261484e-05,
6623
+ "loss": 0.6913,
6624
+ "step": 41400
6625
+ },
6626
+ {
6627
+ "epoch": 17.43,
6628
+ "eval_cer": 0.36425785069023503,
6629
+ "eval_loss": 1.9356818199157715,
6630
+ "eval_runtime": 381.9605,
6631
+ "eval_samples_per_second": 24.814,
6632
+ "eval_steps_per_second": 3.102,
6633
+ "step": 41400
6634
  }
6635
  ],
6636
  "logging_steps": 100,
 
6638
  "num_input_tokens_seen": 0,
6639
  "num_train_epochs": 30,
6640
  "save_steps": 100,
6641
+ "total_flos": 4.5372888758914744e+20,
6642
  "train_batch_size": 8,
6643
  "trial_name": null,
6644
  "trial_params": null