plip commited on
Commit
4e46033
·
1 Parent(s): b0f41f5

Training in progress, step 400000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3ec63277b76b09811bbdd2d480a7f996b4a3ffed8bebe328de7594fd1352f61
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b700ebc6fc16103b0c351577b0fe7b7a9955520a42d8ff4c426cb4c02c884230
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2b105f9c88c11eebbcdaeb34a846701e3b5103c2b4d7fbe6a23d3a666e3272c
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6331caf84d401728c3e2540374852d6e489eae28c3099656d3c86fe297980cc4
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3030b698db0941f5555a8625c40cb49a93fb370be875ba63b732087f65663441
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:418af87c326448c49979f76acb65b2fcea72841274341c16e51b4a9d24a0c8ee
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3030b698db0941f5555a8625c40cb49a93fb370be875ba63b732087f65663441
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:418af87c326448c49979f76acb65b2fcea72841274341c16e51b4a9d24a0c8ee
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3030b698db0941f5555a8625c40cb49a93fb370be875ba63b732087f65663441
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:418af87c326448c49979f76acb65b2fcea72841274341c16e51b4a9d24a0c8ee
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3030b698db0941f5555a8625c40cb49a93fb370be875ba63b732087f65663441
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:418af87c326448c49979f76acb65b2fcea72841274341c16e51b4a9d24a0c8ee
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3030b698db0941f5555a8625c40cb49a93fb370be875ba63b732087f65663441
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:418af87c326448c49979f76acb65b2fcea72841274341c16e51b4a9d24a0c8ee
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3030b698db0941f5555a8625c40cb49a93fb370be875ba63b732087f65663441
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:418af87c326448c49979f76acb65b2fcea72841274341c16e51b4a9d24a0c8ee
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3030b698db0941f5555a8625c40cb49a93fb370be875ba63b732087f65663441
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:418af87c326448c49979f76acb65b2fcea72841274341c16e51b4a9d24a0c8ee
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3030b698db0941f5555a8625c40cb49a93fb370be875ba63b732087f65663441
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:418af87c326448c49979f76acb65b2fcea72841274341c16e51b4a9d24a0c8ee
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:864647684ab3694f7aa2a258c1806e10c4abf99f67ed5e54443050e485ac9436
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ba83cbca80cb672828600b248dd69c4c050beb355cdcf7faf0b56212421edca
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.086884596565623,
5
- "global_step": 390000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -7806,11 +7806,211 @@
7806
  "eval_samples_per_second": 1489.497,
7807
  "eval_steps_per_second": 23.718,
7808
  "step": 390000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7809
  }
7810
  ],
7811
  "max_steps": 500000,
7812
  "num_train_epochs": 12,
7813
- "total_flos": 1.245997593928734e+22,
7814
  "trial_name": null,
7815
  "trial_params": null
7816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.319881637503205,
5
+ "global_step": 400000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
7806
  "eval_samples_per_second": 1489.497,
7807
  "eval_steps_per_second": 23.718,
7808
  "step": 390000
7809
+ },
7810
+ {
7811
+ "epoch": 9.1,
7812
+ "learning_rate": 4.639259509788768e-05,
7813
+ "loss": 0.2564,
7814
+ "step": 390500
7815
+ },
7816
+ {
7817
+ "epoch": 9.11,
7818
+ "learning_rate": 4.60754920716572e-05,
7819
+ "loss": 0.2566,
7820
+ "step": 391000
7821
+ },
7822
+ {
7823
+ "epoch": 9.11,
7824
+ "eval_loss": 0.23735950887203217,
7825
+ "eval_runtime": 1.4271,
7826
+ "eval_samples_per_second": 1540.156,
7827
+ "eval_steps_per_second": 24.525,
7828
+ "step": 391000
7829
+ },
7830
+ {
7831
+ "epoch": 9.12,
7832
+ "learning_rate": 4.5759580226394167e-05,
7833
+ "loss": 0.2566,
7834
+ "step": 391500
7835
+ },
7836
+ {
7837
+ "epoch": 9.13,
7838
+ "learning_rate": 4.544486301685993e-05,
7839
+ "loss": 0.2564,
7840
+ "step": 392000
7841
+ },
7842
+ {
7843
+ "epoch": 9.13,
7844
+ "eval_loss": 0.2416989952325821,
7845
+ "eval_runtime": 1.449,
7846
+ "eval_samples_per_second": 1516.918,
7847
+ "eval_steps_per_second": 24.155,
7848
+ "step": 392000
7849
+ },
7850
+ {
7851
+ "epoch": 9.15,
7852
+ "learning_rate": 4.5131343884751484e-05,
7853
+ "loss": 0.2562,
7854
+ "step": 392500
7855
+ },
7856
+ {
7857
+ "epoch": 9.16,
7858
+ "learning_rate": 4.4819026258663774e-05,
7859
+ "loss": 0.2565,
7860
+ "step": 393000
7861
+ },
7862
+ {
7863
+ "epoch": 9.16,
7864
+ "eval_loss": 0.24084173142910004,
7865
+ "eval_runtime": 1.456,
7866
+ "eval_samples_per_second": 1509.619,
7867
+ "eval_steps_per_second": 24.039,
7868
+ "step": 393000
7869
+ },
7870
+ {
7871
+ "epoch": 9.17,
7872
+ "learning_rate": 4.450791355405234e-05,
7873
+ "loss": 0.2565,
7874
+ "step": 393500
7875
+ },
7876
+ {
7877
+ "epoch": 9.18,
7878
+ "learning_rate": 4.419800917319588e-05,
7879
+ "loss": 0.2566,
7880
+ "step": 394000
7881
+ },
7882
+ {
7883
+ "epoch": 9.18,
7884
+ "eval_loss": 0.23908619582653046,
7885
+ "eval_runtime": 1.4522,
7886
+ "eval_samples_per_second": 1513.612,
7887
+ "eval_steps_per_second": 24.102,
7888
+ "step": 394000
7889
+ },
7890
+ {
7891
+ "epoch": 9.19,
7892
+ "learning_rate": 4.3889316505159056e-05,
7893
+ "loss": 0.2564,
7894
+ "step": 394500
7895
+ },
7896
+ {
7897
+ "epoch": 9.2,
7898
+ "learning_rate": 4.3581838925755465e-05,
7899
+ "loss": 0.2561,
7900
+ "step": 395000
7901
+ },
7902
+ {
7903
+ "epoch": 9.2,
7904
+ "eval_loss": 0.23751436173915863,
7905
+ "eval_runtime": 1.4847,
7906
+ "eval_samples_per_second": 1480.475,
7907
+ "eval_steps_per_second": 23.574,
7908
+ "step": 395000
7909
+ },
7910
+ {
7911
+ "epoch": 9.22,
7912
+ "learning_rate": 4.327557979751057e-05,
7913
+ "loss": 0.2562,
7914
+ "step": 395500
7915
+ },
7916
+ {
7917
+ "epoch": 9.23,
7918
+ "learning_rate": 4.297054246962517e-05,
7919
+ "loss": 0.2562,
7920
+ "step": 396000
7921
+ },
7922
+ {
7923
+ "epoch": 9.23,
7924
+ "eval_loss": 0.23929938673973083,
7925
+ "eval_runtime": 1.4334,
7926
+ "eval_samples_per_second": 1533.416,
7927
+ "eval_steps_per_second": 24.417,
7928
+ "step": 396000
7929
+ },
7930
+ {
7931
+ "epoch": 9.24,
7932
+ "learning_rate": 4.266673027793864e-05,
7933
+ "loss": 0.2562,
7934
+ "step": 396500
7935
+ },
7936
+ {
7937
+ "epoch": 9.25,
7938
+ "learning_rate": 4.236414654489242e-05,
7939
+ "loss": 0.256,
7940
+ "step": 397000
7941
+ },
7942
+ {
7943
+ "epoch": 9.25,
7944
+ "eval_loss": 0.24186982214450836,
7945
+ "eval_runtime": 1.4512,
7946
+ "eval_samples_per_second": 1514.618,
7947
+ "eval_steps_per_second": 24.118,
7948
+ "step": 397000
7949
+ },
7950
+ {
7951
+ "epoch": 9.26,
7952
+ "learning_rate": 4.206279457949371e-05,
7953
+ "loss": 0.2558,
7954
+ "step": 397500
7955
+ },
7956
+ {
7957
+ "epoch": 9.27,
7958
+ "learning_rate": 4.1762677677279335e-05,
7959
+ "loss": 0.2561,
7960
+ "step": 398000
7961
+ },
7962
+ {
7963
+ "epoch": 9.27,
7964
+ "eval_loss": 0.24197684228420258,
7965
+ "eval_runtime": 1.4439,
7966
+ "eval_samples_per_second": 1522.288,
7967
+ "eval_steps_per_second": 24.24,
7968
+ "step": 398000
7969
+ },
7970
+ {
7971
+ "epoch": 9.28,
7972
+ "learning_rate": 4.146379912027964e-05,
7973
+ "loss": 0.2561,
7974
+ "step": 398500
7975
+ },
7976
+ {
7977
+ "epoch": 9.3,
7978
+ "learning_rate": 4.1166162176982664e-05,
7979
+ "loss": 0.2554,
7980
+ "step": 399000
7981
+ },
7982
+ {
7983
+ "epoch": 9.3,
7984
+ "eval_loss": 0.24182839691638947,
7985
+ "eval_runtime": 1.4359,
7986
+ "eval_samples_per_second": 1530.716,
7987
+ "eval_steps_per_second": 24.374,
7988
+ "step": 399000
7989
+ },
7990
+ {
7991
+ "epoch": 9.31,
7992
+ "learning_rate": 4.086977010229838e-05,
7993
+ "loss": 0.2557,
7994
+ "step": 399500
7995
+ },
7996
+ {
7997
+ "epoch": 9.32,
7998
+ "learning_rate": 4.057462613752294e-05,
7999
+ "loss": 0.2559,
8000
+ "step": 400000
8001
+ },
8002
+ {
8003
+ "epoch": 9.32,
8004
+ "eval_loss": 0.23793195188045502,
8005
+ "eval_runtime": 1.413,
8006
+ "eval_samples_per_second": 1555.592,
8007
+ "eval_steps_per_second": 24.771,
8008
+ "step": 400000
8009
  }
8010
  ],
8011
  "max_steps": 500000,
8012
  "num_train_epochs": 12,
8013
+ "total_flos": 1.277946307806184e+22,
8014
  "trial_name": null,
8015
  "trial_params": null
8016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2b105f9c88c11eebbcdaeb34a846701e3b5103c2b4d7fbe6a23d3a666e3272c
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6331caf84d401728c3e2540374852d6e489eae28c3099656d3c86fe297980cc4
3
  size 102501541