plip commited on
Commit
8964dbe
·
1 Parent(s): a21d85d

Training in progress, step 200000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd33a1577954e215c8437e7980add2f5f9574d602de488a6b0720d3ad4458fca
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeed81ff957b2db542c361adc3c37054999526f67b2fdd08124045a7a9910110
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a460f3fbdd157653313f970d730a974ff0dafa62f6d47ebc7be418c29489b80d
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a1e1eb146f1c0d6bafed7b37d2555b3cf3a9a1cec24e6e7b6230f23f01cccd4
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e960d152fa8a2009412029824cd0038b06b011605abe65cb9b329a00e7e113e2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb751814ba6ecdcd5c1c820cf948dc125ec28136a0f7891aedc4ea8d19c01bac
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e960d152fa8a2009412029824cd0038b06b011605abe65cb9b329a00e7e113e2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb751814ba6ecdcd5c1c820cf948dc125ec28136a0f7891aedc4ea8d19c01bac
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e960d152fa8a2009412029824cd0038b06b011605abe65cb9b329a00e7e113e2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb751814ba6ecdcd5c1c820cf948dc125ec28136a0f7891aedc4ea8d19c01bac
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e960d152fa8a2009412029824cd0038b06b011605abe65cb9b329a00e7e113e2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb751814ba6ecdcd5c1c820cf948dc125ec28136a0f7891aedc4ea8d19c01bac
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e960d152fa8a2009412029824cd0038b06b011605abe65cb9b329a00e7e113e2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb751814ba6ecdcd5c1c820cf948dc125ec28136a0f7891aedc4ea8d19c01bac
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e960d152fa8a2009412029824cd0038b06b011605abe65cb9b329a00e7e113e2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb751814ba6ecdcd5c1c820cf948dc125ec28136a0f7891aedc4ea8d19c01bac
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e960d152fa8a2009412029824cd0038b06b011605abe65cb9b329a00e7e113e2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb751814ba6ecdcd5c1c820cf948dc125ec28136a0f7891aedc4ea8d19c01bac
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e960d152fa8a2009412029824cd0038b06b011605abe65cb9b329a00e7e113e2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb751814ba6ecdcd5c1c820cf948dc125ec28136a0f7891aedc4ea8d19c01bac
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0439027dd6f55adc764cb1317d963df8ccf36442066dafecce10b3f538efa8e
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e461416c63c82734faef19b4949af829ad430bff342d30400b1c1da0cafb58f
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.426943777814022,
5
- "global_step": 190000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -3806,11 +3806,211 @@
3806
  "eval_samples_per_second": 1514.384,
3807
  "eval_steps_per_second": 24.114,
3808
  "step": 190000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3809
  }
3810
  ],
3811
  "max_steps": 500000,
3812
  "num_train_epochs": 12,
3813
- "total_flos": 6.070245648751873e+21,
3814
  "trial_name": null,
3815
  "trial_params": null
3816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.659940818751602,
5
+ "global_step": 200000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
3806
  "eval_samples_per_second": 1514.384,
3807
  "eval_steps_per_second": 24.114,
3808
  "step": 190000
3809
+ },
3810
+ {
3811
+ "epoch": 4.44,
3812
+ "learning_rate": 0.00022146867474920118,
3813
+ "loss": 0.2828,
3814
+ "step": 190500
3815
+ },
3816
+ {
3817
+ "epoch": 4.45,
3818
+ "learning_rate": 0.00022104215397571484,
3819
+ "loss": 0.281,
3820
+ "step": 191000
3821
+ },
3822
+ {
3823
+ "epoch": 4.45,
3824
+ "eval_loss": 0.2647937834262848,
3825
+ "eval_runtime": 1.4216,
3826
+ "eval_samples_per_second": 1546.121,
3827
+ "eval_steps_per_second": 24.62,
3828
+ "step": 191000
3829
+ },
3830
+ {
3831
+ "epoch": 4.46,
3832
+ "learning_rate": 0.0002206149109758135,
3833
+ "loss": 0.2816,
3834
+ "step": 191500
3835
+ },
3836
+ {
3837
+ "epoch": 4.47,
3838
+ "learning_rate": 0.00022018695042175818,
3839
+ "loss": 0.2812,
3840
+ "step": 192000
3841
+ },
3842
+ {
3843
+ "epoch": 4.47,
3844
+ "eval_loss": 0.2635156512260437,
3845
+ "eval_runtime": 1.4081,
3846
+ "eval_samples_per_second": 1560.959,
3847
+ "eval_steps_per_second": 24.856,
3848
+ "step": 192000
3849
+ },
3850
+ {
3851
+ "epoch": 4.49,
3852
+ "learning_rate": 0.00021975827699365693,
3853
+ "loss": 0.2807,
3854
+ "step": 192500
3855
+ },
3856
+ {
3857
+ "epoch": 4.5,
3858
+ "learning_rate": 0.00021932889537941365,
3859
+ "loss": 0.2809,
3860
+ "step": 193000
3861
+ },
3862
+ {
3863
+ "epoch": 4.5,
3864
+ "eval_loss": 0.2628609240055084,
3865
+ "eval_runtime": 1.4036,
3866
+ "eval_samples_per_second": 1565.967,
3867
+ "eval_steps_per_second": 24.936,
3868
+ "step": 193000
3869
+ },
3870
+ {
3871
+ "epoch": 4.51,
3872
+ "learning_rate": 0.0002188988102746769,
3873
+ "loss": 0.2806,
3874
+ "step": 193500
3875
+ },
3876
+ {
3877
+ "epoch": 4.52,
3878
+ "learning_rate": 0.0002184680263827885,
3879
+ "loss": 0.2809,
3880
+ "step": 194000
3881
+ },
3882
+ {
3883
+ "epoch": 4.52,
3884
+ "eval_loss": 0.26426324248313904,
3885
+ "eval_runtime": 1.4613,
3886
+ "eval_samples_per_second": 1504.163,
3887
+ "eval_steps_per_second": 23.952,
3888
+ "step": 194000
3889
+ },
3890
+ {
3891
+ "epoch": 4.53,
3892
+ "learning_rate": 0.00021803654841473204,
3893
+ "loss": 0.2805,
3894
+ "step": 194500
3895
+ },
3896
+ {
3897
+ "epoch": 4.54,
3898
+ "learning_rate": 0.00021760438108908142,
3899
+ "loss": 0.2805,
3900
+ "step": 195000
3901
+ },
3902
+ {
3903
+ "epoch": 4.54,
3904
+ "eval_loss": 0.2624601125717163,
3905
+ "eval_runtime": 1.4471,
3906
+ "eval_samples_per_second": 1518.947,
3907
+ "eval_steps_per_second": 24.187,
3908
+ "step": 195000
3909
+ },
3910
+ {
3911
+ "epoch": 4.56,
3912
+ "learning_rate": 0.0002171715291319494,
3913
+ "loss": 0.2809,
3914
+ "step": 195500
3915
+ },
3916
+ {
3917
+ "epoch": 4.57,
3918
+ "learning_rate": 0.0002167379972769355,
3919
+ "loss": 0.2806,
3920
+ "step": 196000
3921
+ },
3922
+ {
3923
+ "epoch": 4.57,
3924
+ "eval_loss": 0.26392462849617004,
3925
+ "eval_runtime": 1.3978,
3926
+ "eval_samples_per_second": 1572.46,
3927
+ "eval_steps_per_second": 25.039,
3928
+ "step": 196000
3929
+ },
3930
+ {
3931
+ "epoch": 4.58,
3932
+ "learning_rate": 0.0002163037902650747,
3933
+ "loss": 0.2801,
3934
+ "step": 196500
3935
+ },
3936
+ {
3937
+ "epoch": 4.59,
3938
+ "learning_rate": 0.0002158689128447853,
3939
+ "loss": 0.2805,
3940
+ "step": 197000
3941
+ },
3942
+ {
3943
+ "epoch": 4.59,
3944
+ "eval_loss": 0.262777715921402,
3945
+ "eval_runtime": 1.4828,
3946
+ "eval_samples_per_second": 1482.293,
3947
+ "eval_steps_per_second": 23.603,
3948
+ "step": 197000
3949
+ },
3950
+ {
3951
+ "epoch": 4.6,
3952
+ "learning_rate": 0.00021543336977181704,
3953
+ "loss": 0.2805,
3954
+ "step": 197500
3955
+ },
3956
+ {
3957
+ "epoch": 4.61,
3958
+ "learning_rate": 0.00021499716580919933,
3959
+ "loss": 0.2799,
3960
+ "step": 198000
3961
+ },
3962
+ {
3963
+ "epoch": 4.61,
3964
+ "eval_loss": 0.26294538378715515,
3965
+ "eval_runtime": 1.4108,
3966
+ "eval_samples_per_second": 1557.962,
3967
+ "eval_steps_per_second": 24.808,
3968
+ "step": 198000
3969
+ },
3970
+ {
3971
+ "epoch": 4.62,
3972
+ "learning_rate": 0.00021456030572718866,
3973
+ "loss": 0.2797,
3974
+ "step": 198500
3975
+ },
3976
+ {
3977
+ "epoch": 4.64,
3978
+ "learning_rate": 0.000214122794303217,
3979
+ "loss": 0.2802,
3980
+ "step": 199000
3981
+ },
3982
+ {
3983
+ "epoch": 4.64,
3984
+ "eval_loss": 0.2624069154262543,
3985
+ "eval_runtime": 1.4063,
3986
+ "eval_samples_per_second": 1562.932,
3987
+ "eval_steps_per_second": 24.887,
3988
+ "step": 199000
3989
+ },
3990
+ {
3991
+ "epoch": 4.65,
3992
+ "learning_rate": 0.00021368463632183912,
3993
+ "loss": 0.2799,
3994
+ "step": 199500
3995
+ },
3996
+ {
3997
+ "epoch": 4.66,
3998
+ "learning_rate": 0.00021324583657468055,
3999
+ "loss": 0.2799,
4000
+ "step": 200000
4001
+ },
4002
+ {
4003
+ "epoch": 4.66,
4004
+ "eval_loss": 0.2646183967590332,
4005
+ "eval_runtime": 1.4232,
4006
+ "eval_samples_per_second": 1544.399,
4007
+ "eval_steps_per_second": 24.592,
4008
+ "step": 200000
4009
  }
4010
  ],
4011
  "max_steps": 500000,
4012
  "num_train_epochs": 12,
4013
+ "total_flos": 6.389732787526373e+21,
4014
  "trial_name": null,
4015
  "trial_params": null
4016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a460f3fbdd157653313f970d730a974ff0dafa62f6d47ebc7be418c29489b80d
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a1e1eb146f1c0d6bafed7b37d2555b3cf3a9a1cec24e6e7b6230f23f01cccd4
3
  size 102501541