joelniklaus commited on
Commit
3f73ee9
1 Parent(s): d79a927

Training in progress, step 150000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce0a9ceb77645caea569cffed69f290b1a62d35cb4d2d3033329212a18547546
3
  size 885325017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c485e39025b832d2bd3bd464214f232ea6b3f0a5283d0f592f5b95f7afffe93d
3
  size 885325017
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:258a403857724ef630fe48099f54db2ec14c7dae15424f2874409ee2181d7651
3
  size 442675755
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bc0d6bce669ef3b9a37ef6ee5196e838e490074197c122478da0bc35a3e8702
3
  size 442675755
last-checkpoint/trainer_state.json CHANGED
@@ -638,7 +638,7 @@
638
  {
639
  "epoch": 0.1,
640
  "learning_rate": 9.923399386589933e-05,
641
- "loss": 0.6526,
642
  "step": 103000
643
  },
644
  {
@@ -650,285 +650,285 @@
650
  {
651
  "epoch": 0.1,
652
  "learning_rate": 9.917525374361912e-05,
653
- "loss": 1.0438,
654
  "step": 105000
655
  },
656
  {
657
  "epoch": 0.11,
658
  "learning_rate": 9.914507686137019e-05,
659
- "loss": 0.961,
660
  "step": 106000
661
  },
662
  {
663
  "epoch": 0.11,
664
  "learning_rate": 9.911436253643445e-05,
665
- "loss": 0.9393,
666
  "step": 107000
667
  },
668
  {
669
  "epoch": 0.11,
670
  "learning_rate": 9.90831111046988e-05,
671
- "loss": 1.0514,
672
  "step": 108000
673
  },
674
  {
675
  "epoch": 0.11,
676
  "learning_rate": 9.905132290792394e-05,
677
- "loss": 1.0211,
678
  "step": 109000
679
  },
680
  {
681
  "epoch": 0.11,
682
  "learning_rate": 9.901899829374047e-05,
683
- "loss": 1.003,
684
  "step": 110000
685
  },
686
  {
687
  "epoch": 0.11,
688
  "learning_rate": 9.89861376156452e-05,
689
- "loss": 1.0614,
690
  "step": 111000
691
  },
692
  {
693
  "epoch": 0.11,
694
  "learning_rate": 9.895274123299723e-05,
695
- "loss": 1.0745,
696
  "step": 112000
697
  },
698
  {
699
  "epoch": 0.11,
700
  "learning_rate": 9.891880951101407e-05,
701
- "loss": 1.0833,
702
  "step": 113000
703
  },
704
  {
705
  "epoch": 0.11,
706
  "learning_rate": 9.888434282076758e-05,
707
- "loss": 0.9589,
708
  "step": 114000
709
  },
710
  {
711
  "epoch": 0.12,
712
  "learning_rate": 9.884934153917997e-05,
713
- "loss": 1.0296,
714
  "step": 115000
715
  },
716
  {
717
  "epoch": 0.12,
718
  "learning_rate": 9.881380604901964e-05,
719
- "loss": 1.0161,
720
  "step": 116000
721
  },
722
  {
723
  "epoch": 0.12,
724
  "learning_rate": 9.877773673889701e-05,
725
- "loss": 0.7998,
726
  "step": 117000
727
  },
728
  {
729
  "epoch": 0.12,
730
  "learning_rate": 9.87411340032603e-05,
731
- "loss": 0.9128,
732
  "step": 118000
733
  },
734
  {
735
  "epoch": 0.12,
736
  "learning_rate": 9.870399824239117e-05,
737
- "loss": 0.9641,
738
  "step": 119000
739
  },
740
  {
741
  "epoch": 0.12,
742
  "learning_rate": 9.86663298624003e-05,
743
- "loss": 0.9109,
744
  "step": 120000
745
  },
746
  {
747
  "epoch": 0.12,
748
  "learning_rate": 9.862812927522309e-05,
749
- "loss": 1.0227,
750
  "step": 121000
751
  },
752
  {
753
  "epoch": 0.12,
754
  "learning_rate": 9.858939689861506e-05,
755
- "loss": 0.9949,
756
  "step": 122000
757
  },
758
  {
759
  "epoch": 0.12,
760
  "learning_rate": 9.855013315614725e-05,
761
- "loss": 0.9838,
762
  "step": 123000
763
  },
764
  {
765
  "epoch": 0.12,
766
  "learning_rate": 9.851033847720166e-05,
767
- "loss": 0.9925,
768
  "step": 124000
769
  },
770
  {
771
  "epoch": 0.12,
772
  "learning_rate": 9.847001329696653e-05,
773
- "loss": 0.913,
774
  "step": 125000
775
  },
776
  {
777
  "epoch": 0.13,
778
  "learning_rate": 9.842915805643155e-05,
779
- "loss": 0.9249,
780
  "step": 126000
781
  },
782
  {
783
  "epoch": 0.13,
784
  "learning_rate": 9.838777320238312e-05,
785
- "loss": 0.9684,
786
  "step": 127000
787
  },
788
  {
789
  "epoch": 0.13,
790
  "learning_rate": 9.834585918739936e-05,
791
- "loss": 1.0079,
792
  "step": 128000
793
  },
794
  {
795
  "epoch": 0.13,
796
  "learning_rate": 9.830341646984521e-05,
797
- "loss": 0.9349,
798
  "step": 129000
799
  },
800
  {
801
  "epoch": 0.13,
802
  "learning_rate": 9.826044551386744e-05,
803
- "loss": 0.9906,
804
  "step": 130000
805
  },
806
  {
807
  "epoch": 0.13,
808
  "learning_rate": 9.821694678938953e-05,
809
- "loss": 1.0286,
810
  "step": 131000
811
  },
812
  {
813
  "epoch": 0.13,
814
  "learning_rate": 9.817292077210659e-05,
815
- "loss": 1.0763,
816
  "step": 132000
817
  },
818
  {
819
  "epoch": 0.13,
820
  "learning_rate": 9.812836794348004e-05,
821
- "loss": 0.9955,
822
  "step": 133000
823
  },
824
  {
825
  "epoch": 0.13,
826
  "learning_rate": 9.808328879073251e-05,
827
- "loss": 1.0031,
828
  "step": 134000
829
  },
830
  {
831
  "epoch": 0.14,
832
  "learning_rate": 9.803768380684242e-05,
833
- "loss": 1.2198,
834
  "step": 135000
835
  },
836
  {
837
  "epoch": 0.14,
838
  "learning_rate": 9.799155349053851e-05,
839
- "loss": 1.0474,
840
  "step": 136000
841
  },
842
  {
843
  "epoch": 0.14,
844
  "learning_rate": 9.794489834629455e-05,
845
- "loss": 0.9156,
846
  "step": 137000
847
  },
848
  {
849
  "epoch": 0.14,
850
  "learning_rate": 9.789771888432375e-05,
851
- "loss": 0.9748,
852
  "step": 138000
853
  },
854
  {
855
  "epoch": 0.14,
856
  "learning_rate": 9.785001562057309e-05,
857
- "loss": 0.9482,
858
  "step": 139000
859
  },
860
  {
861
  "epoch": 0.14,
862
  "learning_rate": 9.780178907671789e-05,
863
- "loss": 0.919,
864
  "step": 140000
865
  },
866
  {
867
  "epoch": 0.14,
868
  "learning_rate": 9.775303978015585e-05,
869
- "loss": 0.9874,
870
  "step": 141000
871
  },
872
  {
873
  "epoch": 0.14,
874
  "learning_rate": 9.77037682640015e-05,
875
- "loss": 0.9887,
876
  "step": 142000
877
  },
878
  {
879
  "epoch": 0.14,
880
  "learning_rate": 9.765397506708023e-05,
881
- "loss": 0.9241,
882
  "step": 143000
883
  },
884
  {
885
  "epoch": 0.14,
886
  "learning_rate": 9.760366073392246e-05,
887
- "loss": 0.9219,
888
  "step": 144000
889
  },
890
  {
891
  "epoch": 0.14,
892
  "learning_rate": 9.755282581475769e-05,
893
- "loss": 0.9471,
894
  "step": 145000
895
  },
896
  {
897
  "epoch": 0.15,
898
  "learning_rate": 9.750147086550844e-05,
899
- "loss": 0.7892,
900
  "step": 146000
901
  },
902
  {
903
  "epoch": 0.15,
904
  "learning_rate": 9.744959644778422e-05,
905
- "loss": 0.6732,
906
  "step": 147000
907
  },
908
  {
909
  "epoch": 0.15,
910
  "learning_rate": 9.739720312887535e-05,
911
- "loss": 0.6777,
912
  "step": 148000
913
  },
914
  {
915
  "epoch": 0.15,
916
  "learning_rate": 9.734429148174675e-05,
917
- "loss": 1.0322,
918
  "step": 149000
919
  },
920
  {
921
  "epoch": 0.15,
922
  "learning_rate": 9.729086208503174e-05,
923
- "loss": 1.0166,
924
  "step": 150000
925
  },
926
  {
927
  "epoch": 0.15,
928
- "eval_loss": 0.79302978515625,
929
- "eval_runtime": 26.7643,
930
- "eval_samples_per_second": 186.816,
931
- "eval_steps_per_second": 1.495,
932
  "step": 150000
933
  }
934
  ],
 
638
  {
639
  "epoch": 0.1,
640
  "learning_rate": 9.923399386589933e-05,
641
+ "loss": 0.6525,
642
  "step": 103000
643
  },
644
  {
 
650
  {
651
  "epoch": 0.1,
652
  "learning_rate": 9.917525374361912e-05,
653
+ "loss": 1.0445,
654
  "step": 105000
655
  },
656
  {
657
  "epoch": 0.11,
658
  "learning_rate": 9.914507686137019e-05,
659
+ "loss": 0.9613,
660
  "step": 106000
661
  },
662
  {
663
  "epoch": 0.11,
664
  "learning_rate": 9.911436253643445e-05,
665
+ "loss": 0.9396,
666
  "step": 107000
667
  },
668
  {
669
  "epoch": 0.11,
670
  "learning_rate": 9.90831111046988e-05,
671
+ "loss": 1.0517,
672
  "step": 108000
673
  },
674
  {
675
  "epoch": 0.11,
676
  "learning_rate": 9.905132290792394e-05,
677
+ "loss": 1.0215,
678
  "step": 109000
679
  },
680
  {
681
  "epoch": 0.11,
682
  "learning_rate": 9.901899829374047e-05,
683
+ "loss": 1.0032,
684
  "step": 110000
685
  },
686
  {
687
  "epoch": 0.11,
688
  "learning_rate": 9.89861376156452e-05,
689
+ "loss": 1.0618,
690
  "step": 111000
691
  },
692
  {
693
  "epoch": 0.11,
694
  "learning_rate": 9.895274123299723e-05,
695
+ "loss": 1.0749,
696
  "step": 112000
697
  },
698
  {
699
  "epoch": 0.11,
700
  "learning_rate": 9.891880951101407e-05,
701
+ "loss": 1.0837,
702
  "step": 113000
703
  },
704
  {
705
  "epoch": 0.11,
706
  "learning_rate": 9.888434282076758e-05,
707
+ "loss": 0.9592,
708
  "step": 114000
709
  },
710
  {
711
  "epoch": 0.12,
712
  "learning_rate": 9.884934153917997e-05,
713
+ "loss": 1.0298,
714
  "step": 115000
715
  },
716
  {
717
  "epoch": 0.12,
718
  "learning_rate": 9.881380604901964e-05,
719
+ "loss": 1.0163,
720
  "step": 116000
721
  },
722
  {
723
  "epoch": 0.12,
724
  "learning_rate": 9.877773673889701e-05,
725
+ "loss": 0.8,
726
  "step": 117000
727
  },
728
  {
729
  "epoch": 0.12,
730
  "learning_rate": 9.87411340032603e-05,
731
+ "loss": 0.9133,
732
  "step": 118000
733
  },
734
  {
735
  "epoch": 0.12,
736
  "learning_rate": 9.870399824239117e-05,
737
+ "loss": 0.9643,
738
  "step": 119000
739
  },
740
  {
741
  "epoch": 0.12,
742
  "learning_rate": 9.86663298624003e-05,
743
+ "loss": 0.9112,
744
  "step": 120000
745
  },
746
  {
747
  "epoch": 0.12,
748
  "learning_rate": 9.862812927522309e-05,
749
+ "loss": 1.023,
750
  "step": 121000
751
  },
752
  {
753
  "epoch": 0.12,
754
  "learning_rate": 9.858939689861506e-05,
755
+ "loss": 0.9952,
756
  "step": 122000
757
  },
758
  {
759
  "epoch": 0.12,
760
  "learning_rate": 9.855013315614725e-05,
761
+ "loss": 0.984,
762
  "step": 123000
763
  },
764
  {
765
  "epoch": 0.12,
766
  "learning_rate": 9.851033847720166e-05,
767
+ "loss": 0.9928,
768
  "step": 124000
769
  },
770
  {
771
  "epoch": 0.12,
772
  "learning_rate": 9.847001329696653e-05,
773
+ "loss": 0.9131,
774
  "step": 125000
775
  },
776
  {
777
  "epoch": 0.13,
778
  "learning_rate": 9.842915805643155e-05,
779
+ "loss": 0.9251,
780
  "step": 126000
781
  },
782
  {
783
  "epoch": 0.13,
784
  "learning_rate": 9.838777320238312e-05,
785
+ "loss": 0.9687,
786
  "step": 127000
787
  },
788
  {
789
  "epoch": 0.13,
790
  "learning_rate": 9.834585918739936e-05,
791
+ "loss": 1.0081,
792
  "step": 128000
793
  },
794
  {
795
  "epoch": 0.13,
796
  "learning_rate": 9.830341646984521e-05,
797
+ "loss": 0.9352,
798
  "step": 129000
799
  },
800
  {
801
  "epoch": 0.13,
802
  "learning_rate": 9.826044551386744e-05,
803
+ "loss": 0.9909,
804
  "step": 130000
805
  },
806
  {
807
  "epoch": 0.13,
808
  "learning_rate": 9.821694678938953e-05,
809
+ "loss": 1.0289,
810
  "step": 131000
811
  },
812
  {
813
  "epoch": 0.13,
814
  "learning_rate": 9.817292077210659e-05,
815
+ "loss": 1.0766,
816
  "step": 132000
817
  },
818
  {
819
  "epoch": 0.13,
820
  "learning_rate": 9.812836794348004e-05,
821
+ "loss": 0.9957,
822
  "step": 133000
823
  },
824
  {
825
  "epoch": 0.13,
826
  "learning_rate": 9.808328879073251e-05,
827
+ "loss": 1.0035,
828
  "step": 134000
829
  },
830
  {
831
  "epoch": 0.14,
832
  "learning_rate": 9.803768380684242e-05,
833
+ "loss": 1.2201,
834
  "step": 135000
835
  },
836
  {
837
  "epoch": 0.14,
838
  "learning_rate": 9.799155349053851e-05,
839
+ "loss": 1.0479,
840
  "step": 136000
841
  },
842
  {
843
  "epoch": 0.14,
844
  "learning_rate": 9.794489834629455e-05,
845
+ "loss": 0.916,
846
  "step": 137000
847
  },
848
  {
849
  "epoch": 0.14,
850
  "learning_rate": 9.789771888432375e-05,
851
+ "loss": 0.9751,
852
  "step": 138000
853
  },
854
  {
855
  "epoch": 0.14,
856
  "learning_rate": 9.785001562057309e-05,
857
+ "loss": 0.9485,
858
  "step": 139000
859
  },
860
  {
861
  "epoch": 0.14,
862
  "learning_rate": 9.780178907671789e-05,
863
+ "loss": 0.9192,
864
  "step": 140000
865
  },
866
  {
867
  "epoch": 0.14,
868
  "learning_rate": 9.775303978015585e-05,
869
+ "loss": 0.9877,
870
  "step": 141000
871
  },
872
  {
873
  "epoch": 0.14,
874
  "learning_rate": 9.77037682640015e-05,
875
+ "loss": 0.989,
876
  "step": 142000
877
  },
878
  {
879
  "epoch": 0.14,
880
  "learning_rate": 9.765397506708023e-05,
881
+ "loss": 0.9245,
882
  "step": 143000
883
  },
884
  {
885
  "epoch": 0.14,
886
  "learning_rate": 9.760366073392246e-05,
887
+ "loss": 0.9223,
888
  "step": 144000
889
  },
890
  {
891
  "epoch": 0.14,
892
  "learning_rate": 9.755282581475769e-05,
893
+ "loss": 0.9474,
894
  "step": 145000
895
  },
896
  {
897
  "epoch": 0.15,
898
  "learning_rate": 9.750147086550844e-05,
899
+ "loss": 0.7895,
900
  "step": 146000
901
  },
902
  {
903
  "epoch": 0.15,
904
  "learning_rate": 9.744959644778422e-05,
905
+ "loss": 0.6734,
906
  "step": 147000
907
  },
908
  {
909
  "epoch": 0.15,
910
  "learning_rate": 9.739720312887535e-05,
911
+ "loss": 0.6778,
912
  "step": 148000
913
  },
914
  {
915
  "epoch": 0.15,
916
  "learning_rate": 9.734429148174675e-05,
917
+ "loss": 1.0325,
918
  "step": 149000
919
  },
920
  {
921
  "epoch": 0.15,
922
  "learning_rate": 9.729086208503174e-05,
923
+ "loss": 1.017,
924
  "step": 150000
925
  },
926
  {
927
  "epoch": 0.15,
928
+ "eval_loss": 0.791193425655365,
929
+ "eval_runtime": 27.8851,
930
+ "eval_samples_per_second": 179.307,
931
+ "eval_steps_per_second": 1.434,
932
  "step": 150000
933
  }
934
  ],
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb205c897cb7fffb6995f8ddb353379694c397676fdd449afebe019b835b4f8a
3
  size 3503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c6ec36a3db40f8f1ddaac5209f15ca1b14671366820c356e3429fb92e12f3c8
3
  size 3503
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:258a403857724ef630fe48099f54db2ec14c7dae15424f2874409ee2181d7651
3
  size 442675755
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bc0d6bce669ef3b9a37ef6ee5196e838e490074197c122478da0bc35a3e8702
3
  size 442675755
runs/Feb20_15-56-09_t1v-n-9f780742-w-0/events.out.tfevents.1676908745.t1v-n-9f780742-w-0.3298479.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a38d01f07a6ca880886fadf26a74c971f81e785cd62e1c3bf085dfc339d2e43
3
- size 12107
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54e06d4f36aa91c889360b5ae94a88b9ec9ece094ed9e243da1d8c15cdf4f251
3
+ size 13907
runs/Feb24_00-12-30_t1v-n-9f780742-w-0/1677197576.9197693/events.out.tfevents.1677197576.t1v-n-9f780742-w-0.3044920.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a5fbd6e886b0c4dc4c944fc5ef5df06817655069ed402a843d6e82683f505c9
3
+ size 5494
runs/Feb24_00-12-30_t1v-n-9f780742-w-0/events.out.tfevents.1677197576.t1v-n-9f780742-w-0.3044920.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4259b30c6b0189c21e1f311b8a8317502cfb9427519242ed75c126f270891573
3
+ size 4191
runs/Feb24_00-19-36_t1v-n-9f780742-w-0/1677197998.1740472/events.out.tfevents.1677197998.t1v-n-9f780742-w-0.3062414.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22336f6cbbbee2fac23968c04cc7ae7ee82056968dd22e2389cdd293772f96e8
3
+ size 5494
runs/Feb24_00-19-36_t1v-n-9f780742-w-0/events.out.tfevents.1677197998.t1v-n-9f780742-w-0.3062414.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed9e768531b20105cac38e46eaa3f23b96f7e74bd097e6264fe8b8fa85c6b808
3
+ size 4191
runs/Feb24_02-46-37_t1v-n-9f780742-w-0/1677206822.561554/events.out.tfevents.1677206822.t1v-n-9f780742-w-0.3088188.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b08653912ee437c10feae38a9b02a6a7fefd406f0733910788a5789c5f275c6
3
+ size 5494
runs/Feb24_02-46-37_t1v-n-9f780742-w-0/events.out.tfevents.1677206822.t1v-n-9f780742-w-0.3088188.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:517b84445e0e1a867254ad82f284b566d3eb43350bdf62570c192a250de1decd
3
+ size 4191
runs/Feb25_19-20-30_t1v-n-9f780742-w-0/1677352852.8462265/events.out.tfevents.1677352852.t1v-n-9f780742-w-0.3235622.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37c186d49eb69d4077969e00cc7a10fdd645eacab5bf553597a602349c63d45d
3
+ size 5494
runs/Feb25_19-20-30_t1v-n-9f780742-w-0/events.out.tfevents.1677352852.t1v-n-9f780742-w-0.3235622.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8d91c3fb78aff0688ef117263ed04672aaf11c076152935b41b6c1c0db64896
3
+ size 3831
runs/Feb27_19-46-56_t1v-n-9f780742-w-0/1677527238.5880394/events.out.tfevents.1677527238.t1v-n-9f780742-w-0.2687388.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61770893ad58894535db5b4cc776137aca01a3f871d3a5c3d812a443f7f3e655
3
+ size 5494
runs/Feb27_19-46-56_t1v-n-9f780742-w-0/events.out.tfevents.1677527238.t1v-n-9f780742-w-0.2687388.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53cdd5d73ce64e8fdcfd9d8faae04f0a4434a994fe919e51e7a8311600e68ab6
3
+ size 12107
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb205c897cb7fffb6995f8ddb353379694c397676fdd449afebe019b835b4f8a
3
  size 3503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c6ec36a3db40f8f1ddaac5209f15ca1b14671366820c356e3429fb92e12f3c8
3
  size 3503