plip commited on
Commit
423ce3d
1 Parent(s): b4875aa

Training in progress, step 450000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d13dee40bf739e07daa5e776dbbdc46c2654fbf14746060e2ac946fb44dbe8e5
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c75b6a012ab82fb16d27738e0a95c3b1b8ef7e5d1f6ee0116a8c50549bb847f8
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61e3683717b89ff6ecb486f42da196e91a7d9315fab8b8963d03c3872a54d691
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78b24f59351cac08b9824b3f79acd0f1dc0a89b48a8f25dc820166f982b53fa7
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce0e6291f372fe9f86b6a4068760fc679c69ce294ee69065c8ff7ec587e652c0
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a4f1f1c1cb0bb3ff6487630da96ece6a563c0a58db594f3dd7e10d4cd4b3713
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce0e6291f372fe9f86b6a4068760fc679c69ce294ee69065c8ff7ec587e652c0
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a4f1f1c1cb0bb3ff6487630da96ece6a563c0a58db594f3dd7e10d4cd4b3713
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce0e6291f372fe9f86b6a4068760fc679c69ce294ee69065c8ff7ec587e652c0
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a4f1f1c1cb0bb3ff6487630da96ece6a563c0a58db594f3dd7e10d4cd4b3713
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce0e6291f372fe9f86b6a4068760fc679c69ce294ee69065c8ff7ec587e652c0
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a4f1f1c1cb0bb3ff6487630da96ece6a563c0a58db594f3dd7e10d4cd4b3713
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce0e6291f372fe9f86b6a4068760fc679c69ce294ee69065c8ff7ec587e652c0
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a4f1f1c1cb0bb3ff6487630da96ece6a563c0a58db594f3dd7e10d4cd4b3713
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce0e6291f372fe9f86b6a4068760fc679c69ce294ee69065c8ff7ec587e652c0
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a4f1f1c1cb0bb3ff6487630da96ece6a563c0a58db594f3dd7e10d4cd4b3713
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce0e6291f372fe9f86b6a4068760fc679c69ce294ee69065c8ff7ec587e652c0
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a4f1f1c1cb0bb3ff6487630da96ece6a563c0a58db594f3dd7e10d4cd4b3713
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce0e6291f372fe9f86b6a4068760fc679c69ce294ee69065c8ff7ec587e652c0
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a4f1f1c1cb0bb3ff6487630da96ece6a563c0a58db594f3dd7e10d4cd4b3713
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1894f32b2441ea8820978bbb44f8f2d9ce0a579e669301efbd1655378591798a
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc29c434fb0390a8f4f90d65ac745a0b4f381dbd06e857762d450d4a464c7045
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.251869801253523,
5
- "global_step": 440000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -8806,11 +8806,211 @@
8806
  "eval_samples_per_second": 1530.896,
8807
  "eval_steps_per_second": 24.377,
8808
  "step": 440000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8809
  }
8810
  ],
8811
  "max_steps": 500000,
8812
  "num_train_epochs": 12,
8813
- "total_flos": 1.4057409136168932e+22,
8814
  "trial_name": null,
8815
  "trial_params": null
8816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.484866842191105,
5
+ "global_step": 450000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
8806
  "eval_samples_per_second": 1530.896,
8807
  "eval_steps_per_second": 24.377,
8808
  "step": 440000
8809
+ },
8810
+ {
8811
+ "epoch": 10.26,
8812
+ "learning_rate": 2.1083388335824145e-05,
8813
+ "loss": 0.2526,
8814
+ "step": 440500
8815
+ },
8816
+ {
8817
+ "epoch": 10.28,
8818
+ "learning_rate": 2.0900255633978873e-05,
8819
+ "loss": 0.2525,
8820
+ "step": 441000
8821
+ },
8822
+ {
8823
+ "epoch": 10.28,
8824
+ "eval_loss": 0.23552300035953522,
8825
+ "eval_runtime": 1.4323,
8826
+ "eval_samples_per_second": 1534.608,
8827
+ "eval_steps_per_second": 24.436,
8828
+ "step": 441000
8829
+ },
8830
+ {
8831
+ "epoch": 10.29,
8832
+ "learning_rate": 2.0718589425453314e-05,
8833
+ "loss": 0.2527,
8834
+ "step": 441500
8835
+ },
8836
+ {
8837
+ "epoch": 10.3,
8838
+ "learning_rate": 2.0538391696920015e-05,
8839
+ "loss": 0.2521,
8840
+ "step": 442000
8841
+ },
8842
+ {
8843
+ "epoch": 10.3,
8844
+ "eval_loss": 0.23650765419006348,
8845
+ "eval_runtime": 1.4235,
8846
+ "eval_samples_per_second": 1544.133,
8847
+ "eval_steps_per_second": 24.588,
8848
+ "step": 442000
8849
+ },
8850
+ {
8851
+ "epoch": 10.31,
8852
+ "learning_rate": 2.035966441899249e-05,
8853
+ "loss": 0.2523,
8854
+ "step": 442500
8855
+ },
8856
+ {
8857
+ "epoch": 10.32,
8858
+ "learning_rate": 2.0182409546203555e-05,
8859
+ "loss": 0.2523,
8860
+ "step": 443000
8861
+ },
8862
+ {
8863
+ "epoch": 10.32,
8864
+ "eval_loss": 0.23561330139636993,
8865
+ "eval_runtime": 1.4199,
8866
+ "eval_samples_per_second": 1548.043,
8867
+ "eval_steps_per_second": 24.65,
8868
+ "step": 443000
8869
+ },
8870
+ {
8871
+ "epoch": 10.33,
8872
+ "learning_rate": 2.000662901698415e-05,
8873
+ "loss": 0.2521,
8874
+ "step": 443500
8875
+ },
8876
+ {
8877
+ "epoch": 10.35,
8878
+ "learning_rate": 1.983232475364195e-05,
8879
+ "loss": 0.2523,
8880
+ "step": 444000
8881
+ },
8882
+ {
8883
+ "epoch": 10.35,
8884
+ "eval_loss": 0.23662236332893372,
8885
+ "eval_runtime": 1.4145,
8886
+ "eval_samples_per_second": 1553.873,
8887
+ "eval_steps_per_second": 24.743,
8888
+ "step": 444000
8889
+ },
8890
+ {
8891
+ "epoch": 10.36,
8892
+ "learning_rate": 1.9659498662340474e-05,
8893
+ "loss": 0.2524,
8894
+ "step": 444500
8895
+ },
8896
+ {
8897
+ "epoch": 10.37,
8898
+ "learning_rate": 1.948815263307819e-05,
8899
+ "loss": 0.2519,
8900
+ "step": 445000
8901
+ },
8902
+ {
8903
+ "epoch": 10.37,
8904
+ "eval_loss": 0.23521380126476288,
8905
+ "eval_runtime": 1.4054,
8906
+ "eval_samples_per_second": 1563.94,
8907
+ "eval_steps_per_second": 24.903,
8908
+ "step": 445000
8909
+ },
8910
+ {
8911
+ "epoch": 10.38,
8912
+ "learning_rate": 1.9318288539667765e-05,
8913
+ "loss": 0.2524,
8914
+ "step": 445500
8915
+ },
8916
+ {
8917
+ "epoch": 10.39,
8918
+ "learning_rate": 1.914990823971574e-05,
8919
+ "loss": 0.2523,
8920
+ "step": 446000
8921
+ },
8922
+ {
8923
+ "epoch": 10.39,
8924
+ "eval_loss": 0.2357960045337677,
8925
+ "eval_runtime": 1.4368,
8926
+ "eval_samples_per_second": 1529.784,
8927
+ "eval_steps_per_second": 24.36,
8928
+ "step": 446000
8929
+ },
8930
+ {
8931
+ "epoch": 10.4,
8932
+ "learning_rate": 1.8983013574602096e-05,
8933
+ "loss": 0.2521,
8934
+ "step": 446500
8935
+ },
8936
+ {
8937
+ "epoch": 10.41,
8938
+ "learning_rate": 1.8817606369460156e-05,
8939
+ "loss": 0.2521,
8940
+ "step": 447000
8941
+ },
8942
+ {
8943
+ "epoch": 10.41,
8944
+ "eval_loss": 0.23412732779979706,
8945
+ "eval_runtime": 1.4282,
8946
+ "eval_samples_per_second": 1538.96,
8947
+ "eval_steps_per_second": 24.506,
8948
+ "step": 447000
8949
+ },
8950
+ {
8951
+ "epoch": 10.43,
8952
+ "learning_rate": 1.865368843315663e-05,
8953
+ "loss": 0.252,
8954
+ "step": 447500
8955
+ },
8956
+ {
8957
+ "epoch": 10.44,
8958
+ "learning_rate": 1.8491261558271762e-05,
8959
+ "loss": 0.2519,
8960
+ "step": 448000
8961
+ },
8962
+ {
8963
+ "epoch": 10.44,
8964
+ "eval_loss": 0.23749284446239471,
8965
+ "eval_runtime": 1.4343,
8966
+ "eval_samples_per_second": 1532.461,
8967
+ "eval_steps_per_second": 24.402,
8968
+ "step": 448000
8969
+ },
8970
+ {
8971
+ "epoch": 10.45,
8972
+ "learning_rate": 1.833032752107986e-05,
8973
+ "loss": 0.2518,
8974
+ "step": 448500
8975
+ },
8976
+ {
8977
+ "epoch": 10.46,
8978
+ "learning_rate": 1.817088808152978e-05,
8979
+ "loss": 0.2519,
8980
+ "step": 449000
8981
+ },
8982
+ {
8983
+ "epoch": 10.46,
8984
+ "eval_loss": 0.2349672168493271,
8985
+ "eval_runtime": 1.4127,
8986
+ "eval_samples_per_second": 1555.927,
8987
+ "eval_steps_per_second": 24.776,
8988
+ "step": 449000
8989
+ },
8990
+ {
8991
+ "epoch": 10.47,
8992
+ "learning_rate": 1.801294498322569e-05,
8993
+ "loss": 0.2517,
8994
+ "step": 449500
8995
+ },
8996
+ {
8997
+ "epoch": 10.48,
8998
+ "learning_rate": 1.7856499953407978e-05,
8999
+ "loss": 0.2519,
9000
+ "step": 450000
9001
+ },
9002
+ {
9003
+ "epoch": 10.48,
9004
+ "eval_loss": 0.2356027066707611,
9005
+ "eval_runtime": 1.4379,
9006
+ "eval_samples_per_second": 1528.604,
9007
+ "eval_steps_per_second": 24.341,
9008
+ "step": 450000
9009
  }
9010
  ],
9011
  "max_steps": 500000,
9012
  "num_train_epochs": 12,
9013
+ "total_flos": 1.4376896274943432e+22,
9014
  "trial_name": null,
9015
  "trial_params": null
9016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61e3683717b89ff6ecb486f42da196e91a7d9315fab8b8963d03c3872a54d691
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78b24f59351cac08b9824b3f79acd0f1dc0a89b48a8f25dc820166f982b53fa7
3
  size 102501541