ashanhr commited on
Commit
108a58e
1 Parent(s): 3daa4f2

Training in progress, step 11500, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0801e65f2be99dc1c8c31a8b42fd4c773ebf79cbd409c888bed299500fb67698
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b584231ffb94f091ca5b180feeefb07882a963d45f72c94630e8cac5fca0e3d3
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1758303ea0e620428277da4b68b42acf7e0c018a4d0c417e71df888e1c059b80
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e61f0b12121b7f0c47ad42e38859555c79382d0759ba8c9f5a3f104bde0f8a5
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41a61360fb256b330826da64337ef28050520c40ede2f24419acab91e8992eb7
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:357c2098f0867985c60e1e38c32c9f89ff84cf66d05ef0c4edea8dac7da4617c
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d18a65e292716178d42dc52ada61554f9c6e571740a31ef9797ed2e10748246
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47182995a8ebb9d76f8a3fc5f5dc83e49f842e0b52d89c8721a4037b63289456
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20def0d25234f52b30d8748f635736a2d8d0a986bdde02ca7864e7641a877f56
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fc2d2274e5132a8354a7158de2274a04f9a3f95d5f286132bba35524da29764
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.756893285624079,
5
  "eval_steps": 100,
6
- "global_step": 11300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1815,6 +1815,38 @@
1815
  "eval_samples_per_second": 26.775,
1816
  "eval_steps_per_second": 3.348,
1817
  "step": 11300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1818
  }
1819
  ],
1820
  "logging_steps": 100,
@@ -1822,7 +1854,7 @@
1822
  "num_input_tokens_seen": 0,
1823
  "num_train_epochs": 30,
1824
  "save_steps": 100,
1825
- "total_flos": 1.2373877715197002e+20,
1826
  "train_batch_size": 8,
1827
  "trial_name": null,
1828
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.84108608713955,
5
  "eval_steps": 100,
6
+ "global_step": 11500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1815
  "eval_samples_per_second": 26.775,
1816
  "eval_steps_per_second": 3.348,
1817
  "step": 11300
1818
+ },
1819
+ {
1820
+ "epoch": 4.8,
1821
+ "grad_norm": 1.9226583242416382,
1822
+ "learning_rate": 4.230388692579505e-05,
1823
+ "loss": 1.4556,
1824
+ "step": 11400
1825
+ },
1826
+ {
1827
+ "epoch": 4.8,
1828
+ "eval_cer": 0.4807105705682218,
1829
+ "eval_loss": 1.4841840267181396,
1830
+ "eval_runtime": 376.2491,
1831
+ "eval_samples_per_second": 25.191,
1832
+ "eval_steps_per_second": 3.15,
1833
+ "step": 11400
1834
+ },
1835
+ {
1836
+ "epoch": 4.84,
1837
+ "grad_norm": 6.85697603225708,
1838
+ "learning_rate": 4.223321554770318e-05,
1839
+ "loss": 1.4391,
1840
+ "step": 11500
1841
+ },
1842
+ {
1843
+ "epoch": 4.84,
1844
+ "eval_cer": 0.48008730593250165,
1845
+ "eval_loss": 3.111499547958374,
1846
+ "eval_runtime": 358.9952,
1847
+ "eval_samples_per_second": 26.401,
1848
+ "eval_steps_per_second": 3.301,
1849
+ "step": 11500
1850
  }
1851
  ],
1852
  "logging_steps": 100,
 
1854
  "num_input_tokens_seen": 0,
1855
  "num_train_epochs": 30,
1856
  "save_steps": 100,
1857
+ "total_flos": 1.2594986525589176e+20,
1858
  "train_batch_size": 8,
1859
  "trial_name": null,
1860
  "trial_params": null