ashanhr commited on
Commit
ecb5fba
·
verified ·
1 Parent(s): f872839

Training in progress, step 10700, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dcf5defa12c4ad41768400f0e89f96d1327b2145e3d75459f574578b69dcb6cc
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71f48962b77a0ffadd466c017fcd32bec6946f7ff9c2a2e74009ea84a13a26ac
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc23937287860621ac669594158d01024ad6c4348b6e7634487d58cbff59e02d
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd244e0c641e9fc420b274487550a5fd81f515433393f446ae69f4eb95c66400
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20cdaee32636e4c7671e7b7305bf0afc014c7bf1db11c1dc264cd4d2b45e1530
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5da71c3972cdf1c8ca47cfca858d3f5eebd3ddd0bbfffc533b99224dfc911ff
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52e871decdaedd2ec0c4362d0e1ac2c22c91aeb0a484fed63f8055a64f9a662a
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3e5ea2f8b945220efdb684f11b1780e2edefdb156c009e8c72ef7983c62a8e2
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1593679e1b8de5f2d731b1b6aaef936886d9f364698ba0468455350ed39fffa6
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca3e3c26689da313cb94037cbfc3a7e856b1f0f65c3a0e23766260d8fd9feabe
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.420122079562198,
5
  "eval_steps": 100,
6
- "global_step": 10500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1687,6 +1687,38 @@
1687
  "eval_samples_per_second": 26.52,
1688
  "eval_steps_per_second": 3.316,
1689
  "step": 10500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1690
  }
1691
  ],
1692
  "logging_steps": 100,
@@ -1694,7 +1726,7 @@
1694
  "num_input_tokens_seen": 0,
1695
  "num_train_epochs": 30,
1696
  "save_steps": 100,
1697
- "total_flos": 1.1500317525115727e+20,
1698
  "train_batch_size": 8,
1699
  "trial_name": null,
1700
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.504314881077668,
5
  "eval_steps": 100,
6
+ "global_step": 10700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1687
  "eval_samples_per_second": 26.52,
1688
  "eval_steps_per_second": 3.316,
1689
  "step": 10500
1690
+ },
1691
+ {
1692
+ "epoch": 4.46,
1693
+ "grad_norm": 3.71893048286438,
1694
+ "learning_rate": 4.286925795053004e-05,
1695
+ "loss": 1.4426,
1696
+ "step": 10600
1697
+ },
1698
+ {
1699
+ "epoch": 4.46,
1700
+ "eval_cer": 0.48283211450471236,
1701
+ "eval_loss": 2.5203747749328613,
1702
+ "eval_runtime": 378.5225,
1703
+ "eval_samples_per_second": 25.039,
1704
+ "eval_steps_per_second": 3.131,
1705
+ "step": 10600
1706
+ },
1707
+ {
1708
+ "epoch": 4.5,
1709
+ "grad_norm": 3.5400538444519043,
1710
+ "learning_rate": 4.279858657243816e-05,
1711
+ "loss": 2.5204,
1712
+ "step": 10700
1713
+ },
1714
+ {
1715
+ "epoch": 4.5,
1716
+ "eval_cer": 0.496866567596105,
1717
+ "eval_loss": 3.1510612964630127,
1718
+ "eval_runtime": 360.9029,
1719
+ "eval_samples_per_second": 26.262,
1720
+ "eval_steps_per_second": 3.283,
1721
+ "step": 10700
1722
  }
1723
  ],
1724
  "logging_steps": 100,
 
1726
  "num_input_tokens_seen": 0,
1727
  "num_train_epochs": 30,
1728
  "save_steps": 100,
1729
+ "total_flos": 1.1715979719910262e+20,
1730
  "train_batch_size": 8,
1731
  "trial_name": null,
1732
  "trial_params": null