plip commited on
Commit
d83e249
1 Parent(s): 36a87dc

Training in progress, step 100000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:150d8b3c57728037c4a472baaa9d8c4cdc3e29d155183e7b2a12684f9319c8cc
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5a8740e850a7025efc4240518d5b20a0266625ee7a4df5824989ffd90a771e4
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac7a8dbd06b2428a64e1f1bb1794f8399c9be17660f04561ced3ff300c0d49c2
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9743dfff2c56ecc42fa49782ae324bef5653923fd6c7bcd04473781c4c5cb59f
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:711bcfdcb6dd8b515af0d3d840d09eef233ce6cd3e8e3e4c426ccd84e88d11e4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92ffde1f04d1013b5eec7ae73480f479d0d281fd050bfb67dcf4ffaf69a11bc9
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:711bcfdcb6dd8b515af0d3d840d09eef233ce6cd3e8e3e4c426ccd84e88d11e4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92ffde1f04d1013b5eec7ae73480f479d0d281fd050bfb67dcf4ffaf69a11bc9
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:711bcfdcb6dd8b515af0d3d840d09eef233ce6cd3e8e3e4c426ccd84e88d11e4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92ffde1f04d1013b5eec7ae73480f479d0d281fd050bfb67dcf4ffaf69a11bc9
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:711bcfdcb6dd8b515af0d3d840d09eef233ce6cd3e8e3e4c426ccd84e88d11e4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92ffde1f04d1013b5eec7ae73480f479d0d281fd050bfb67dcf4ffaf69a11bc9
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:711bcfdcb6dd8b515af0d3d840d09eef233ce6cd3e8e3e4c426ccd84e88d11e4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92ffde1f04d1013b5eec7ae73480f479d0d281fd050bfb67dcf4ffaf69a11bc9
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:711bcfdcb6dd8b515af0d3d840d09eef233ce6cd3e8e3e4c426ccd84e88d11e4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92ffde1f04d1013b5eec7ae73480f479d0d281fd050bfb67dcf4ffaf69a11bc9
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:711bcfdcb6dd8b515af0d3d840d09eef233ce6cd3e8e3e4c426ccd84e88d11e4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92ffde1f04d1013b5eec7ae73480f479d0d281fd050bfb67dcf4ffaf69a11bc9
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:711bcfdcb6dd8b515af0d3d840d09eef233ce6cd3e8e3e4c426ccd84e88d11e4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92ffde1f04d1013b5eec7ae73480f479d0d281fd050bfb67dcf4ffaf69a11bc9
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5810d5d1337eca8d561357c6c9e9920258e5fc2b2f0f70ea4b52e4984949eec8
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7786e0d240c1817a80f936fe537093f6b0f81238abcccea2c0e618f1ac9e9438
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.096973368438221,
5
- "global_step": 90000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1806,11 +1806,211 @@
1806
  "eval_samples_per_second": 1570.392,
1807
  "eval_steps_per_second": 25.006,
1808
  "step": 90000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1809
  }
1810
  ],
1811
  "max_steps": 500000,
1812
  "num_train_epochs": 12,
1813
- "total_flos": 2.8753792549886865e+21,
1814
  "trial_name": null,
1815
  "trial_params": null
1816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.329970409375801,
5
+ "global_step": 100000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1806
  "eval_samples_per_second": 1570.392,
1807
  "eval_steps_per_second": 25.006,
1808
  "step": 90000
1809
+ },
1810
+ {
1811
+ "epoch": 2.11,
1812
+ "learning_rate": 0.00028660537773622294,
1813
+ "loss": 0.3073,
1814
+ "step": 90500
1815
+ },
1816
+ {
1817
+ "epoch": 2.12,
1818
+ "learning_rate": 0.00028640336867499143,
1819
+ "loss": 0.3063,
1820
+ "step": 91000
1821
+ },
1822
+ {
1823
+ "epoch": 2.12,
1824
+ "eval_loss": 0.28914499282836914,
1825
+ "eval_runtime": 1.4031,
1826
+ "eval_samples_per_second": 1566.538,
1827
+ "eval_steps_per_second": 24.945,
1828
+ "step": 91000
1829
+ },
1830
+ {
1831
+ "epoch": 2.13,
1832
+ "learning_rate": 0.0002861999226075728,
1833
+ "loss": 0.306,
1834
+ "step": 91500
1835
+ },
1836
+ {
1837
+ "epoch": 2.14,
1838
+ "learning_rate": 0.0002859950417588206,
1839
+ "loss": 0.3057,
1840
+ "step": 92000
1841
+ },
1842
+ {
1843
+ "epoch": 2.14,
1844
+ "eval_loss": 0.28657251596450806,
1845
+ "eval_runtime": 1.3993,
1846
+ "eval_samples_per_second": 1570.823,
1847
+ "eval_steps_per_second": 25.013,
1848
+ "step": 92000
1849
+ },
1850
+ {
1851
+ "epoch": 2.16,
1852
+ "learning_rate": 0.00028578872836927904,
1853
+ "loss": 0.3057,
1854
+ "step": 92500
1855
+ },
1856
+ {
1857
+ "epoch": 2.17,
1858
+ "learning_rate": 0.0002855809846951582,
1859
+ "loss": 0.3066,
1860
+ "step": 93000
1861
+ },
1862
+ {
1863
+ "epoch": 2.17,
1864
+ "eval_loss": 0.2893889248371124,
1865
+ "eval_runtime": 1.4241,
1866
+ "eval_samples_per_second": 1543.421,
1867
+ "eval_steps_per_second": 24.577,
1868
+ "step": 93000
1869
+ },
1870
+ {
1871
+ "epoch": 2.18,
1872
+ "learning_rate": 0.00028537181300830963,
1873
+ "loss": 0.305,
1874
+ "step": 93500
1875
+ },
1876
+ {
1877
+ "epoch": 2.19,
1878
+ "learning_rate": 0.0002851612155962014,
1879
+ "loss": 0.3048,
1880
+ "step": 94000
1881
+ },
1882
+ {
1883
+ "epoch": 2.19,
1884
+ "eval_loss": 0.2858821451663971,
1885
+ "eval_runtime": 1.4056,
1886
+ "eval_samples_per_second": 1563.797,
1887
+ "eval_steps_per_second": 24.901,
1888
+ "step": 94000
1889
+ },
1890
+ {
1891
+ "epoch": 2.2,
1892
+ "learning_rate": 0.0002849491947618932,
1893
+ "loss": 0.3048,
1894
+ "step": 94500
1895
+ },
1896
+ {
1897
+ "epoch": 2.21,
1898
+ "learning_rate": 0.0002847357528240107,
1899
+ "loss": 0.3051,
1900
+ "step": 95000
1901
+ },
1902
+ {
1903
+ "epoch": 2.21,
1904
+ "eval_loss": 0.28810954093933105,
1905
+ "eval_runtime": 1.4039,
1906
+ "eval_samples_per_second": 1565.629,
1907
+ "eval_steps_per_second": 24.93,
1908
+ "step": 95000
1909
+ },
1910
+ {
1911
+ "epoch": 2.23,
1912
+ "learning_rate": 0.0002845208921167208,
1913
+ "loss": 0.3047,
1914
+ "step": 95500
1915
+ },
1916
+ {
1917
+ "epoch": 2.24,
1918
+ "learning_rate": 0.00028430461498970584,
1919
+ "loss": 0.3041,
1920
+ "step": 96000
1921
+ },
1922
+ {
1923
+ "epoch": 2.24,
1924
+ "eval_loss": 0.28588977456092834,
1925
+ "eval_runtime": 1.4454,
1926
+ "eval_samples_per_second": 1520.728,
1927
+ "eval_steps_per_second": 24.215,
1928
+ "step": 96000
1929
+ },
1930
+ {
1931
+ "epoch": 2.25,
1932
+ "learning_rate": 0.00028408692380813775,
1933
+ "loss": 0.3042,
1934
+ "step": 96500
1935
+ },
1936
+ {
1937
+ "epoch": 2.26,
1938
+ "learning_rate": 0.00028386782095265247,
1939
+ "loss": 0.3039,
1940
+ "step": 97000
1941
+ },
1942
+ {
1943
+ "epoch": 2.26,
1944
+ "eval_loss": 0.2837139666080475,
1945
+ "eval_runtime": 1.414,
1946
+ "eval_samples_per_second": 1554.434,
1947
+ "eval_steps_per_second": 24.752,
1948
+ "step": 97000
1949
+ },
1950
+ {
1951
+ "epoch": 2.27,
1952
+ "learning_rate": 0.0002836473088193237,
1953
+ "loss": 0.3036,
1954
+ "step": 97500
1955
+ },
1956
+ {
1957
+ "epoch": 2.28,
1958
+ "learning_rate": 0.00028342538981963677,
1959
+ "loss": 0.3032,
1960
+ "step": 98000
1961
+ },
1962
+ {
1963
+ "epoch": 2.28,
1964
+ "eval_loss": 0.28603771328926086,
1965
+ "eval_runtime": 1.4079,
1966
+ "eval_samples_per_second": 1561.179,
1967
+ "eval_steps_per_second": 24.86,
1968
+ "step": 98000
1969
+ },
1970
+ {
1971
+ "epoch": 2.3,
1972
+ "learning_rate": 0.0002832020663804624,
1973
+ "loss": 0.3035,
1974
+ "step": 98500
1975
+ },
1976
+ {
1977
+ "epoch": 2.31,
1978
+ "learning_rate": 0.00028297734094402986,
1979
+ "loss": 0.3039,
1980
+ "step": 99000
1981
+ },
1982
+ {
1983
+ "epoch": 2.31,
1984
+ "eval_loss": 0.2857549488544464,
1985
+ "eval_runtime": 1.4042,
1986
+ "eval_samples_per_second": 1565.268,
1987
+ "eval_steps_per_second": 24.925,
1988
+ "step": 99000
1989
+ },
1990
+ {
1991
+ "epoch": 2.32,
1992
+ "learning_rate": 0.0002827512159679005,
1993
+ "loss": 0.3027,
1994
+ "step": 99500
1995
+ },
1996
+ {
1997
+ "epoch": 2.33,
1998
+ "learning_rate": 0.00028252369392494086,
1999
+ "loss": 0.3025,
2000
+ "step": 100000
2001
+ },
2002
+ {
2003
+ "epoch": 2.33,
2004
+ "eval_loss": 0.28487083315849304,
2005
+ "eval_runtime": 1.4056,
2006
+ "eval_samples_per_second": 1563.737,
2007
+ "eval_steps_per_second": 24.9,
2008
+ "step": 100000
2009
  }
2010
  ],
2011
  "max_steps": 500000,
2012
  "num_train_epochs": 12,
2013
+ "total_flos": 3.1948663937631865e+21,
2014
  "trial_name": null,
2015
  "trial_params": null
2016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac7a8dbd06b2428a64e1f1bb1794f8399c9be17660f04561ced3ff300c0d49c2
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9743dfff2c56ecc42fa49782ae324bef5653923fd6c7bcd04473781c4c5cb59f
3
  size 102501541