Nadav commited on
Commit
4ff1820
·
1 Parent(s): 6c4551a

Training in progress, step 100000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87faff5dd87328fcefec427ea0cb702408ad590c1646c0f7d82e8de25452a283
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5de948cb6685f3fec4b4469fef2216abb64c3cc5c9fb4b44411eb5b1b0c73ba4
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0479c3f6649af96b0cacdd70e8babe32246fd880992e4fd5742192a6d7c00e5d
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:207b689dc20acc0d2a0e375eb166e925399ce7d85ed5eab0492acb43daf3c3d7
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ac2097dd35c516db233db35554b7dda10d7d27ccd52d7408994cce86d7c83aa
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e5e0a7fe16308908e21fec293fc6a34f87d7d9cfe41576aecb2632ff5899d99
3
  size 15523
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74798da0c97f6f81c79c3a97ed18dc37829dc36f2e8346e520721378fc2c4935
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24efe2e8f1d0c1484971647cd47d1b275b1dd217c12b20dd38b6583a6b5a8f76
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef3a8846658935c38489fb3bdfd532e16ae98f8d1a9e1c24218ae3f20a78e624
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ecaf369939dc91fa10968935b06d43415e74dfc059ada323db7a2ef509df0e1
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.080979925144607,
5
- "global_step": 95000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1906,11 +1906,111 @@
1906
  "eval_samples_per_second": 29.022,
1907
  "eval_steps_per_second": 0.929,
1908
  "step": 95000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1909
  }
1910
  ],
1911
  "max_steps": 1000000,
1912
  "num_train_epochs": 86,
1913
- "total_flos": 4.3701839562951146e+21,
1914
  "trial_name": null,
1915
  "trial_params": null
1916
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.506294658046954,
5
+ "global_step": 100000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1906
  "eval_samples_per_second": 29.022,
1907
  "eval_steps_per_second": 0.929,
1908
  "step": 95000
1909
+ },
1910
+ {
1911
+ "epoch": 8.12,
1912
+ "learning_rate": 9.999999999999999e-06,
1913
+ "loss": 0.3997,
1914
+ "step": 95500
1915
+ },
1916
+ {
1917
+ "epoch": 8.17,
1918
+ "learning_rate": 9.999999999999999e-06,
1919
+ "loss": 0.4024,
1920
+ "step": 96000
1921
+ },
1922
+ {
1923
+ "epoch": 8.17,
1924
+ "eval_loss": 0.37120264768600464,
1925
+ "eval_runtime": 16.2125,
1926
+ "eval_samples_per_second": 30.84,
1927
+ "eval_steps_per_second": 0.987,
1928
+ "step": 96000
1929
+ },
1930
+ {
1931
+ "epoch": 8.21,
1932
+ "learning_rate": 9.999999999999999e-06,
1933
+ "loss": 0.3997,
1934
+ "step": 96500
1935
+ },
1936
+ {
1937
+ "epoch": 8.25,
1938
+ "learning_rate": 9.999999999999999e-06,
1939
+ "loss": 0.402,
1940
+ "step": 97000
1941
+ },
1942
+ {
1943
+ "epoch": 8.25,
1944
+ "eval_loss": 0.37261128425598145,
1945
+ "eval_runtime": 16.3463,
1946
+ "eval_samples_per_second": 30.588,
1947
+ "eval_steps_per_second": 0.979,
1948
+ "step": 97000
1949
+ },
1950
+ {
1951
+ "epoch": 8.29,
1952
+ "learning_rate": 9.999999999999999e-06,
1953
+ "loss": 0.4004,
1954
+ "step": 97500
1955
+ },
1956
+ {
1957
+ "epoch": 8.34,
1958
+ "learning_rate": 9.999999999999999e-06,
1959
+ "loss": 0.4003,
1960
+ "step": 98000
1961
+ },
1962
+ {
1963
+ "epoch": 8.34,
1964
+ "eval_loss": 0.37027257680892944,
1965
+ "eval_runtime": 20.0807,
1966
+ "eval_samples_per_second": 24.9,
1967
+ "eval_steps_per_second": 0.797,
1968
+ "step": 98000
1969
+ },
1970
+ {
1971
+ "epoch": 8.38,
1972
+ "learning_rate": 9.999999999999999e-06,
1973
+ "loss": 0.4002,
1974
+ "step": 98500
1975
+ },
1976
+ {
1977
+ "epoch": 8.42,
1978
+ "learning_rate": 9.999999999999999e-06,
1979
+ "loss": 0.4007,
1980
+ "step": 99000
1981
+ },
1982
+ {
1983
+ "epoch": 8.42,
1984
+ "eval_loss": 0.37140411138534546,
1985
+ "eval_runtime": 21.4309,
1986
+ "eval_samples_per_second": 23.331,
1987
+ "eval_steps_per_second": 0.747,
1988
+ "step": 99000
1989
+ },
1990
+ {
1991
+ "epoch": 8.46,
1992
+ "learning_rate": 9.999999999999999e-06,
1993
+ "loss": 0.4,
1994
+ "step": 99500
1995
+ },
1996
+ {
1997
+ "epoch": 8.51,
1998
+ "learning_rate": 9.999999999999999e-06,
1999
+ "loss": 0.3997,
2000
+ "step": 100000
2001
+ },
2002
+ {
2003
+ "epoch": 8.51,
2004
+ "eval_loss": 0.3693406283855438,
2005
+ "eval_runtime": 17.2208,
2006
+ "eval_samples_per_second": 29.035,
2007
+ "eval_steps_per_second": 0.929,
2008
+ "step": 100000
2009
  }
2010
  ],
2011
  "max_steps": 1000000,
2012
  "num_train_epochs": 86,
2013
+ "total_flos": 4.600200440697905e+21,
2014
  "trial_name": null,
2015
  "trial_params": null
2016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0479c3f6649af96b0cacdd70e8babe32246fd880992e4fd5742192a6d7c00e5d
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:207b689dc20acc0d2a0e375eb166e925399ce7d85ed5eab0492acb43daf3c3d7
3
  size 449471589