joelniklaus commited on
Commit
be74cf2
1 Parent(s): 7463b1b

Training in progress, step 650000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28ee67af53aba46278b2f67f403ff852d746365440eecdf8bce9fcd80cd0bb83
3
  size 3480942553
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80276c82f803233f5fdb661f073deaa493a9da9622e517afb26a84ad5e426889
3
  size 3480942553
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf3a2440240f6a8a3e9f4556d4121d36465b190a0240767d5fa77daee75c8504
3
  size 1740493675
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:010ff15e7c5b4944d147f08e3ca39a7a13d144f0ac2b46d7be37da6a2832f71d
3
  size 1740493675
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f6d3f5c9c843b9212bde5d89446d0571e28dbb8ebe90a8cd6f7cc64ee039aa7
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9938c710ed3be6b7a7709c622a2ddd00b0b3538d188f7c24757e3a01a9d961a9
3
  size 13611
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f6d3f5c9c843b9212bde5d89446d0571e28dbb8ebe90a8cd6f7cc64ee039aa7
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9938c710ed3be6b7a7709c622a2ddd00b0b3538d188f7c24757e3a01a9d961a9
3
  size 13611
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f6d3f5c9c843b9212bde5d89446d0571e28dbb8ebe90a8cd6f7cc64ee039aa7
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9938c710ed3be6b7a7709c622a2ddd00b0b3538d188f7c24757e3a01a9d961a9
3
  size 13611
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f6d3f5c9c843b9212bde5d89446d0571e28dbb8ebe90a8cd6f7cc64ee039aa7
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9938c710ed3be6b7a7709c622a2ddd00b0b3538d188f7c24757e3a01a9d961a9
3
  size 13611
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f6d3f5c9c843b9212bde5d89446d0571e28dbb8ebe90a8cd6f7cc64ee039aa7
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9938c710ed3be6b7a7709c622a2ddd00b0b3538d188f7c24757e3a01a9d961a9
3
  size 13611
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f6d3f5c9c843b9212bde5d89446d0571e28dbb8ebe90a8cd6f7cc64ee039aa7
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9938c710ed3be6b7a7709c622a2ddd00b0b3538d188f7c24757e3a01a9d961a9
3
  size 13611
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f6d3f5c9c843b9212bde5d89446d0571e28dbb8ebe90a8cd6f7cc64ee039aa7
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9938c710ed3be6b7a7709c622a2ddd00b0b3538d188f7c24757e3a01a9d961a9
3
  size 13611
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f6d3f5c9c843b9212bde5d89446d0571e28dbb8ebe90a8cd6f7cc64ee039aa7
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9938c710ed3be6b7a7709c622a2ddd00b0b3538d188f7c24757e3a01a9d961a9
3
  size 13611
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f595742cd0d96240559aaf1ff72fa8686f62da9f07c5878ab2af30ab1e4f0a07
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d7fa20411577666fac76fe76348b4f9231439cc2e524d6e3185910c258591e9
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6,
5
- "global_step": 600000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -3702,11 +3702,319 @@
3702
  "eval_samples_per_second": 28.153,
3703
  "eval_steps_per_second": 0.445,
3704
  "step": 600000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3705
  }
3706
  ],
3707
  "max_steps": 1000000,
3708
  "num_train_epochs": 9223372036854775807,
3709
- "total_flos": 3.58012636102656e+19,
3710
  "trial_name": null,
3711
  "trial_params": null
3712
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.65,
5
+ "global_step": 650000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
3702
  "eval_samples_per_second": 28.153,
3703
  "eval_steps_per_second": 0.445,
3704
  "step": 600000
3705
+ },
3706
+ {
3707
+ "epoch": 0.6,
3708
+ "learning_rate": 3.756550564175727e-05,
3709
+ "loss": 0.7762,
3710
+ "step": 601000
3711
+ },
3712
+ {
3713
+ "epoch": 0.6,
3714
+ "learning_rate": 3.74054216221926e-05,
3715
+ "loss": 0.7725,
3716
+ "step": 602000
3717
+ },
3718
+ {
3719
+ "epoch": 0.6,
3720
+ "learning_rate": 3.7245475334919246e-05,
3721
+ "loss": 0.754,
3722
+ "step": 603000
3723
+ },
3724
+ {
3725
+ "epoch": 0.6,
3726
+ "learning_rate": 3.7085668529084184e-05,
3727
+ "loss": 0.7497,
3728
+ "step": 604000
3729
+ },
3730
+ {
3731
+ "epoch": 0.6,
3732
+ "learning_rate": 3.6926002952309016e-05,
3733
+ "loss": 0.7399,
3734
+ "step": 605000
3735
+ },
3736
+ {
3737
+ "epoch": 0.61,
3738
+ "learning_rate": 3.676648035067093e-05,
3739
+ "loss": 0.7482,
3740
+ "step": 606000
3741
+ },
3742
+ {
3743
+ "epoch": 0.61,
3744
+ "learning_rate": 3.6607102468683526e-05,
3745
+ "loss": 0.7521,
3746
+ "step": 607000
3747
+ },
3748
+ {
3749
+ "epoch": 0.61,
3750
+ "learning_rate": 3.6447871049277796e-05,
3751
+ "loss": 0.7565,
3752
+ "step": 608000
3753
+ },
3754
+ {
3755
+ "epoch": 0.61,
3756
+ "learning_rate": 3.628878783378302e-05,
3757
+ "loss": 0.7883,
3758
+ "step": 609000
3759
+ },
3760
+ {
3761
+ "epoch": 0.61,
3762
+ "learning_rate": 3.612985456190778e-05,
3763
+ "loss": 0.7827,
3764
+ "step": 610000
3765
+ },
3766
+ {
3767
+ "epoch": 0.61,
3768
+ "learning_rate": 3.597107297172084e-05,
3769
+ "loss": 0.7823,
3770
+ "step": 611000
3771
+ },
3772
+ {
3773
+ "epoch": 0.61,
3774
+ "learning_rate": 3.581244479963225e-05,
3775
+ "loss": 0.783,
3776
+ "step": 612000
3777
+ },
3778
+ {
3779
+ "epoch": 0.61,
3780
+ "learning_rate": 3.5653971780374295e-05,
3781
+ "loss": 0.778,
3782
+ "step": 613000
3783
+ },
3784
+ {
3785
+ "epoch": 0.61,
3786
+ "learning_rate": 3.5495655646982505e-05,
3787
+ "loss": 0.7877,
3788
+ "step": 614000
3789
+ },
3790
+ {
3791
+ "epoch": 0.61,
3792
+ "learning_rate": 3.533749813077677e-05,
3793
+ "loss": 0.7903,
3794
+ "step": 615000
3795
+ },
3796
+ {
3797
+ "epoch": 0.62,
3798
+ "learning_rate": 3.517950096134232e-05,
3799
+ "loss": 0.794,
3800
+ "step": 616000
3801
+ },
3802
+ {
3803
+ "epoch": 0.62,
3804
+ "learning_rate": 3.5021665866510925e-05,
3805
+ "loss": 0.7947,
3806
+ "step": 617000
3807
+ },
3808
+ {
3809
+ "epoch": 0.62,
3810
+ "learning_rate": 3.4863994572341843e-05,
3811
+ "loss": 0.7925,
3812
+ "step": 618000
3813
+ },
3814
+ {
3815
+ "epoch": 0.62,
3816
+ "learning_rate": 3.470648880310313e-05,
3817
+ "loss": 0.7852,
3818
+ "step": 619000
3819
+ },
3820
+ {
3821
+ "epoch": 0.62,
3822
+ "learning_rate": 3.4549150281252636e-05,
3823
+ "loss": 0.795,
3824
+ "step": 620000
3825
+ },
3826
+ {
3827
+ "epoch": 0.62,
3828
+ "learning_rate": 3.439198072741921e-05,
3829
+ "loss": 0.7721,
3830
+ "step": 621000
3831
+ },
3832
+ {
3833
+ "epoch": 0.62,
3834
+ "learning_rate": 3.423498186038393e-05,
3835
+ "loss": 0.7724,
3836
+ "step": 622000
3837
+ },
3838
+ {
3839
+ "epoch": 0.62,
3840
+ "learning_rate": 3.407815539706124e-05,
3841
+ "loss": 0.7693,
3842
+ "step": 623000
3843
+ },
3844
+ {
3845
+ "epoch": 0.62,
3846
+ "learning_rate": 3.392150305248024e-05,
3847
+ "loss": 0.7803,
3848
+ "step": 624000
3849
+ },
3850
+ {
3851
+ "epoch": 0.62,
3852
+ "learning_rate": 3.3765026539765834e-05,
3853
+ "loss": 0.7795,
3854
+ "step": 625000
3855
+ },
3856
+ {
3857
+ "epoch": 0.63,
3858
+ "learning_rate": 3.360872757012011e-05,
3859
+ "loss": 0.7865,
3860
+ "step": 626000
3861
+ },
3862
+ {
3863
+ "epoch": 0.63,
3864
+ "learning_rate": 3.3452607852803584e-05,
3865
+ "loss": 0.7829,
3866
+ "step": 627000
3867
+ },
3868
+ {
3869
+ "epoch": 0.63,
3870
+ "learning_rate": 3.329666909511645e-05,
3871
+ "loss": 0.7865,
3872
+ "step": 628000
3873
+ },
3874
+ {
3875
+ "epoch": 0.63,
3876
+ "learning_rate": 3.3140913002379995e-05,
3877
+ "loss": 0.7686,
3878
+ "step": 629000
3879
+ },
3880
+ {
3881
+ "epoch": 0.63,
3882
+ "learning_rate": 3.298534127791785e-05,
3883
+ "loss": 0.7874,
3884
+ "step": 630000
3885
+ },
3886
+ {
3887
+ "epoch": 0.63,
3888
+ "learning_rate": 3.282995562303754e-05,
3889
+ "loss": 0.7777,
3890
+ "step": 631000
3891
+ },
3892
+ {
3893
+ "epoch": 0.63,
3894
+ "learning_rate": 3.267475773701161e-05,
3895
+ "loss": 0.788,
3896
+ "step": 632000
3897
+ },
3898
+ {
3899
+ "epoch": 0.63,
3900
+ "learning_rate": 3.251974931705933e-05,
3901
+ "loss": 0.7724,
3902
+ "step": 633000
3903
+ },
3904
+ {
3905
+ "epoch": 0.63,
3906
+ "learning_rate": 3.236493205832795e-05,
3907
+ "loss": 0.7637,
3908
+ "step": 634000
3909
+ },
3910
+ {
3911
+ "epoch": 0.64,
3912
+ "learning_rate": 3.221030765387417e-05,
3913
+ "loss": 0.7805,
3914
+ "step": 635000
3915
+ },
3916
+ {
3917
+ "epoch": 0.64,
3918
+ "learning_rate": 3.205587779464576e-05,
3919
+ "loss": 0.7934,
3920
+ "step": 636000
3921
+ },
3922
+ {
3923
+ "epoch": 0.64,
3924
+ "learning_rate": 3.190164416946285e-05,
3925
+ "loss": 0.7694,
3926
+ "step": 637000
3927
+ },
3928
+ {
3929
+ "epoch": 0.64,
3930
+ "learning_rate": 3.1747608464999725e-05,
3931
+ "loss": 0.7593,
3932
+ "step": 638000
3933
+ },
3934
+ {
3935
+ "epoch": 0.64,
3936
+ "learning_rate": 3.1593772365766105e-05,
3937
+ "loss": 0.7654,
3938
+ "step": 639000
3939
+ },
3940
+ {
3941
+ "epoch": 0.64,
3942
+ "learning_rate": 3.144013755408895e-05,
3943
+ "loss": 0.7785,
3944
+ "step": 640000
3945
+ },
3946
+ {
3947
+ "epoch": 0.64,
3948
+ "learning_rate": 3.128670571009399e-05,
3949
+ "loss": 0.7649,
3950
+ "step": 641000
3951
+ },
3952
+ {
3953
+ "epoch": 0.64,
3954
+ "learning_rate": 3.113347851168721e-05,
3955
+ "loss": 0.7757,
3956
+ "step": 642000
3957
+ },
3958
+ {
3959
+ "epoch": 0.64,
3960
+ "learning_rate": 3.098045763453678e-05,
3961
+ "loss": 0.7734,
3962
+ "step": 643000
3963
+ },
3964
+ {
3965
+ "epoch": 0.64,
3966
+ "learning_rate": 3.082764475205442e-05,
3967
+ "loss": 0.7688,
3968
+ "step": 644000
3969
+ },
3970
+ {
3971
+ "epoch": 0.65,
3972
+ "learning_rate": 3.0675041535377405e-05,
3973
+ "loss": 0.7796,
3974
+ "step": 645000
3975
+ },
3976
+ {
3977
+ "epoch": 0.65,
3978
+ "learning_rate": 3.052264965335e-05,
3979
+ "loss": 0.7823,
3980
+ "step": 646000
3981
+ },
3982
+ {
3983
+ "epoch": 0.65,
3984
+ "learning_rate": 3.0370470772505433e-05,
3985
+ "loss": 0.7946,
3986
+ "step": 647000
3987
+ },
3988
+ {
3989
+ "epoch": 0.65,
3990
+ "learning_rate": 3.0218506557047598e-05,
3991
+ "loss": 0.7799,
3992
+ "step": 648000
3993
+ },
3994
+ {
3995
+ "epoch": 0.65,
3996
+ "learning_rate": 3.006675866883275e-05,
3997
+ "loss": 0.7743,
3998
+ "step": 649000
3999
+ },
4000
+ {
4001
+ "epoch": 0.65,
4002
+ "learning_rate": 2.991522876735154e-05,
4003
+ "loss": 0.7785,
4004
+ "step": 650000
4005
+ },
4006
+ {
4007
+ "epoch": 0.65,
4008
+ "eval_loss": 0.4026853144168854,
4009
+ "eval_runtime": 170.8706,
4010
+ "eval_samples_per_second": 29.262,
4011
+ "eval_steps_per_second": 0.462,
4012
+ "step": 650000
4013
  }
4014
  ],
4015
  "max_steps": 1000000,
4016
  "num_train_epochs": 9223372036854775807,
4017
+ "total_flos": 3.87847022444544e+19,
4018
  "trial_name": null,
4019
  "trial_params": null
4020
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf3a2440240f6a8a3e9f4556d4121d36465b190a0240767d5fa77daee75c8504
3
  size 1740493675
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:010ff15e7c5b4944d147f08e3ca39a7a13d144f0ac2b46d7be37da6a2832f71d
3
  size 1740493675
runs/Feb25_19-25-50_t1v-n-15e54913-w-0/events.out.tfevents.1677353360.t1v-n-15e54913-w-0.2265434.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83e5e2e456e56194105e17b452fd8c58cd77d132fbe96a705b9e5b711228eb6e
3
- size 45193
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6d4a585850293c2690f7fc9d332a58b9f2fd7470b43eb72996349e849315dcf
3
+ size 53469