ashanhr commited on
Commit
ae5133c
·
verified ·
1 Parent(s): f18ace6

Training in progress, step 12100, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b584231ffb94f091ca5b180feeefb07882a963d45f72c94630e8cac5fca0e3d3
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:990f79ce5d5fce5b54c543410da49311c8727e5393eaf8de5beb75ddea62f025
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e61f0b12121b7f0c47ad42e38859555c79382d0759ba8c9f5a3f104bde0f8a5
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:348350ce5be165c3b33d0652bff5953348b5181192242ec398df2e8b058bc2bb
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:357c2098f0867985c60e1e38c32c9f89ff84cf66d05ef0c4edea8dac7da4617c
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fda19efc8188b89a824ef6b745bab7a4b2df0fcc62fc3ee12571612ab5443e8
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47182995a8ebb9d76f8a3fc5f5dc83e49f842e0b52d89c8721a4037b63289456
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac3b1bd46b3911f03359a3982a0c03f865d3787800599fe7d28e536bbc352b08
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8fc2d2274e5132a8354a7158de2274a04f9a3f95d5f286132bba35524da29764
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fca80c682586ea565475c8cb2e3f5097ebcafda0408dbe21093035fc5d9ba92
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.84108608713955,
5
  "eval_steps": 100,
6
- "global_step": 11500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1847,6 +1847,102 @@
1847
  "eval_samples_per_second": 26.401,
1848
  "eval_steps_per_second": 3.301,
1849
  "step": 11500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1850
  }
1851
  ],
1852
  "logging_steps": 100,
@@ -1854,7 +1950,7 @@
1854
  "num_input_tokens_seen": 0,
1855
  "num_train_epochs": 30,
1856
  "save_steps": 100,
1857
- "total_flos": 1.2594986525589176e+20,
1858
  "train_batch_size": 8,
1859
  "trial_name": null,
1860
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.093664491685961,
5
  "eval_steps": 100,
6
+ "global_step": 12100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1847
  "eval_samples_per_second": 26.401,
1848
  "eval_steps_per_second": 3.301,
1849
  "step": 11500
1850
+ },
1851
+ {
1852
+ "epoch": 4.88,
1853
+ "grad_norm": 12.522397994995117,
1854
+ "learning_rate": 4.216254416961131e-05,
1855
+ "loss": 1.4244,
1856
+ "step": 11600
1857
+ },
1858
+ {
1859
+ "epoch": 4.88,
1860
+ "eval_cer": 0.48068612881780143,
1861
+ "eval_loss": 2.598745822906494,
1862
+ "eval_runtime": 385.9814,
1863
+ "eval_samples_per_second": 24.556,
1864
+ "eval_steps_per_second": 3.07,
1865
+ "step": 11600
1866
+ },
1867
+ {
1868
+ "epoch": 4.93,
1869
+ "grad_norm": 3.1015026569366455,
1870
+ "learning_rate": 4.209187279151944e-05,
1871
+ "loss": 3.7378,
1872
+ "step": 11700
1873
+ },
1874
+ {
1875
+ "epoch": 4.93,
1876
+ "eval_cer": 0.47458791208791207,
1877
+ "eval_loss": 2.3908824920654297,
1878
+ "eval_runtime": 373.2148,
1879
+ "eval_samples_per_second": 25.396,
1880
+ "eval_steps_per_second": 3.175,
1881
+ "step": 11700
1882
+ },
1883
+ {
1884
+ "epoch": 4.97,
1885
+ "grad_norm": 86.87032318115234,
1886
+ "learning_rate": 4.2021201413427565e-05,
1887
+ "loss": 2.8329,
1888
+ "step": 11800
1889
+ },
1890
+ {
1891
+ "epoch": 4.97,
1892
+ "eval_cer": 0.4754898126784248,
1893
+ "eval_loss": 2.441450357437134,
1894
+ "eval_runtime": 446.0173,
1895
+ "eval_samples_per_second": 21.25,
1896
+ "eval_steps_per_second": 2.657,
1897
+ "step": 11800
1898
+ },
1899
+ {
1900
+ "epoch": 5.01,
1901
+ "grad_norm": 2.7503468990325928,
1902
+ "learning_rate": 4.195053003533569e-05,
1903
+ "loss": 2.4912,
1904
+ "step": 11900
1905
+ },
1906
+ {
1907
+ "epoch": 5.01,
1908
+ "eval_cer": 0.488764127331743,
1909
+ "eval_loss": 1.6247801780700684,
1910
+ "eval_runtime": 361.3079,
1911
+ "eval_samples_per_second": 26.232,
1912
+ "eval_steps_per_second": 3.28,
1913
+ "step": 11900
1914
+ },
1915
+ {
1916
+ "epoch": 5.05,
1917
+ "grad_norm": 2.511701822280884,
1918
+ "learning_rate": 4.187985865724382e-05,
1919
+ "loss": 2.009,
1920
+ "step": 12000
1921
+ },
1922
+ {
1923
+ "epoch": 5.05,
1924
+ "eval_cer": 0.46521938915177347,
1925
+ "eval_loss": 1.8090691566467285,
1926
+ "eval_runtime": 401.8599,
1927
+ "eval_samples_per_second": 23.585,
1928
+ "eval_steps_per_second": 2.949,
1929
+ "step": 12000
1930
+ },
1931
+ {
1932
+ "epoch": 5.09,
1933
+ "grad_norm": 4.231322765350342,
1934
+ "learning_rate": 4.180918727915194e-05,
1935
+ "loss": 1.6484,
1936
+ "step": 12100
1937
+ },
1938
+ {
1939
+ "epoch": 5.09,
1940
+ "eval_cer": 0.483240291736733,
1941
+ "eval_loss": 1.89494788646698,
1942
+ "eval_runtime": 367.7673,
1943
+ "eval_samples_per_second": 25.772,
1944
+ "eval_steps_per_second": 3.222,
1945
+ "step": 12100
1946
  }
1947
  ],
1948
  "logging_steps": 100,
 
1950
  "num_input_tokens_seen": 0,
1951
  "num_train_epochs": 30,
1952
  "save_steps": 100,
1953
+ "total_flos": 1.3265676741232484e+20,
1954
  "train_batch_size": 8,
1955
  "trial_name": null,
1956
  "trial_params": null