ashanhr commited on
Commit
4c7cd0d
·
verified ·
1 Parent(s): 7050770

Training in progress, step 10500, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0152ae7665e14b7649e1603869eac2627c4b46968a4140b59e840be45c462043
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcf5defa12c4ad41768400f0e89f96d1327b2145e3d75459f574578b69dcb6cc
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2ca09b9d6895bf99912a3b620ebf15f2699cd4d85234bae948db7af35d3805b
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc23937287860621ac669594158d01024ad6c4348b6e7634487d58cbff59e02d
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64f7c21bc62e1590ba7d33964a61cd28c89e814d09700786d96a59dfd2d2f37a
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20cdaee32636e4c7671e7b7305bf0afc014c7bf1db11c1dc264cd4d2b45e1530
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ac36883a60e8fdddb046d3d099aec879515eeb495fb66554d35da33f561e776
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52e871decdaedd2ec0c4362d0e1ac2c22c91aeb0a484fed63f8055a64f9a662a
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c00379d1a32b555abafe33a1610f228542dd2b56a05a75c6d377908358a81a67
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1593679e1b8de5f2d731b1b6aaef936886d9f364698ba0468455350ed39fffa6
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.251736476531256,
5
  "eval_steps": 100,
6
- "global_step": 10100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1623,6 +1623,70 @@
1623
  "eval_samples_per_second": 27.086,
1624
  "eval_steps_per_second": 3.387,
1625
  "step": 10100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1626
  }
1627
  ],
1628
  "logging_steps": 100,
@@ -1630,7 +1694,7 @@
1630
  "num_input_tokens_seen": 0,
1631
  "num_train_epochs": 30,
1632
  "save_steps": 100,
1633
- "total_flos": 1.1062718286020878e+20,
1634
  "train_batch_size": 8,
1635
  "trial_name": null,
1636
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.420122079562198,
5
  "eval_steps": 100,
6
+ "global_step": 10500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1623
  "eval_samples_per_second": 27.086,
1624
  "eval_steps_per_second": 3.387,
1625
  "step": 10100
1626
+ },
1627
+ {
1628
+ "epoch": 4.29,
1629
+ "grad_norm": 3.947378635406494,
1630
+ "learning_rate": 4.315194346289753e-05,
1631
+ "loss": 1.4527,
1632
+ "step": 10200
1633
+ },
1634
+ {
1635
+ "epoch": 4.29,
1636
+ "eval_cer": 0.49508965234054203,
1637
+ "eval_loss": 3.2406046390533447,
1638
+ "eval_runtime": 377.1597,
1639
+ "eval_samples_per_second": 25.13,
1640
+ "eval_steps_per_second": 3.142,
1641
+ "step": 10200
1642
+ },
1643
+ {
1644
+ "epoch": 4.34,
1645
+ "grad_norm": 7.427024841308594,
1646
+ "learning_rate": 4.308127208480566e-05,
1647
+ "loss": 1.4446,
1648
+ "step": 10300
1649
+ },
1650
+ {
1651
+ "epoch": 4.34,
1652
+ "eval_cer": 0.48428639865472606,
1653
+ "eval_loss": 2.7247695922851562,
1654
+ "eval_runtime": 356.4628,
1655
+ "eval_samples_per_second": 26.589,
1656
+ "eval_steps_per_second": 3.324,
1657
+ "step": 10300
1658
+ },
1659
+ {
1660
+ "epoch": 4.38,
1661
+ "grad_norm": 3.1798312664031982,
1662
+ "learning_rate": 4.3010600706713785e-05,
1663
+ "loss": 2.4877,
1664
+ "step": 10400
1665
+ },
1666
+ {
1667
+ "epoch": 4.38,
1668
+ "eval_cer": 0.48304475773336986,
1669
+ "eval_loss": 2.2158772945404053,
1670
+ "eval_runtime": 376.4543,
1671
+ "eval_samples_per_second": 25.177,
1672
+ "eval_steps_per_second": 3.148,
1673
+ "step": 10400
1674
+ },
1675
+ {
1676
+ "epoch": 4.42,
1677
+ "grad_norm": 2.0509414672851562,
1678
+ "learning_rate": 4.293992932862191e-05,
1679
+ "loss": 1.9214,
1680
+ "step": 10500
1681
+ },
1682
+ {
1683
+ "epoch": 4.42,
1684
+ "eval_cer": 0.4835262602166517,
1685
+ "eval_loss": 1.906830906867981,
1686
+ "eval_runtime": 357.384,
1687
+ "eval_samples_per_second": 26.52,
1688
+ "eval_steps_per_second": 3.316,
1689
+ "step": 10500
1690
  }
1691
  ],
1692
  "logging_steps": 100,
 
1694
  "num_input_tokens_seen": 0,
1695
  "num_train_epochs": 30,
1696
  "save_steps": 100,
1697
+ "total_flos": 1.1500317525115727e+20,
1698
  "train_batch_size": 8,
1699
  "trial_name": null,
1700
  "trial_params": null