joelniklaus commited on
Commit
ca54a29
1 Parent(s): 9476660

Training in progress, step 100000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85152592c35a317017d55fa354f34c246356212b9dcab1b41fd2d3c9c62bc15c
3
  size 2693742553
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90896b5081c841f5c896cd27201edec3a834ddf5ff97721a4988999ef95d051f
3
  size 2693742553
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:211a166e31201e3f8c624e735065a9e1e51a2bee91e3092fd30f622e42f9f94b
3
  size 1346893675
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72210ddc5a03c29df4e488fe110d112c8b5dcd865f434eebeec4c5357bf30f3e
3
  size 1346893675
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7de27aa21af9de6af1c4692b306e8986e3451209a6a7976f0ac9cc6e0d1c4942
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:524c23c847bcf10bc48c85a9acbf3b41aee0f0c95369d379f5dffc1ef101a72e
3
  size 13611
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7de27aa21af9de6af1c4692b306e8986e3451209a6a7976f0ac9cc6e0d1c4942
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:524c23c847bcf10bc48c85a9acbf3b41aee0f0c95369d379f5dffc1ef101a72e
3
  size 13611
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7de27aa21af9de6af1c4692b306e8986e3451209a6a7976f0ac9cc6e0d1c4942
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:524c23c847bcf10bc48c85a9acbf3b41aee0f0c95369d379f5dffc1ef101a72e
3
  size 13611
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7de27aa21af9de6af1c4692b306e8986e3451209a6a7976f0ac9cc6e0d1c4942
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:524c23c847bcf10bc48c85a9acbf3b41aee0f0c95369d379f5dffc1ef101a72e
3
  size 13611
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7de27aa21af9de6af1c4692b306e8986e3451209a6a7976f0ac9cc6e0d1c4942
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:524c23c847bcf10bc48c85a9acbf3b41aee0f0c95369d379f5dffc1ef101a72e
3
  size 13611
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7de27aa21af9de6af1c4692b306e8986e3451209a6a7976f0ac9cc6e0d1c4942
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:524c23c847bcf10bc48c85a9acbf3b41aee0f0c95369d379f5dffc1ef101a72e
3
  size 13611
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7de27aa21af9de6af1c4692b306e8986e3451209a6a7976f0ac9cc6e0d1c4942
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:524c23c847bcf10bc48c85a9acbf3b41aee0f0c95369d379f5dffc1ef101a72e
3
  size 13611
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7de27aa21af9de6af1c4692b306e8986e3451209a6a7976f0ac9cc6e0d1c4942
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:524c23c847bcf10bc48c85a9acbf3b41aee0f0c95369d379f5dffc1ef101a72e
3
  size 13611
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04ad030a150fa0d8eb5e5920300951e9645dc85319159f9dd4f177aff4c5b722
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:647800033a1fe4aa55a6bc8c002ddb2326a52950bde89b878eccf8a697eacefa
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.00035,
5
- "global_step": 150000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -618,323 +618,15 @@
618
  {
619
  "epoch": 1.03,
620
  "eval_loss": 0.4750686585903168,
621
- "eval_runtime": 31.6698,
622
- "eval_samples_per_second": 157.879,
623
- "eval_steps_per_second": 2.494,
624
  "step": 100000
625
- },
626
- {
627
- "epoch": 1.03,
628
- "learning_rate": 9.929058033379181e-05,
629
- "loss": 0.5951,
630
- "step": 101000
631
- },
632
- {
633
- "epoch": 1.03,
634
- "learning_rate": 9.926255646355804e-05,
635
- "loss": 0.6244,
636
- "step": 102000
637
- },
638
- {
639
- "epoch": 1.03,
640
- "learning_rate": 9.923399386589933e-05,
641
- "loss": 0.7808,
642
- "step": 103000
643
- },
644
- {
645
- "epoch": 1.03,
646
- "learning_rate": 9.92048928531717e-05,
647
- "loss": 0.6963,
648
- "step": 104000
649
- },
650
- {
651
- "epoch": 1.03,
652
- "learning_rate": 9.917525374361912e-05,
653
- "loss": 0.6241,
654
- "step": 105000
655
- },
656
- {
657
- "epoch": 1.03,
658
- "learning_rate": 9.914507686137019e-05,
659
- "loss": 0.6501,
660
- "step": 106000
661
- },
662
- {
663
- "epoch": 1.03,
664
- "learning_rate": 9.911436253643445e-05,
665
- "loss": 0.7491,
666
- "step": 107000
667
- },
668
- {
669
- "epoch": 1.03,
670
- "learning_rate": 9.90831111046988e-05,
671
- "loss": 0.68,
672
- "step": 108000
673
- },
674
- {
675
- "epoch": 1.03,
676
- "learning_rate": 9.905132290792394e-05,
677
- "loss": 0.6164,
678
- "step": 109000
679
- },
680
- {
681
- "epoch": 1.04,
682
- "learning_rate": 9.901899829374047e-05,
683
- "loss": 0.6354,
684
- "step": 110000
685
- },
686
- {
687
- "epoch": 1.04,
688
- "learning_rate": 9.89861376156452e-05,
689
- "loss": 0.7301,
690
- "step": 111000
691
- },
692
- {
693
- "epoch": 1.04,
694
- "learning_rate": 9.895274123299723e-05,
695
- "loss": 0.658,
696
- "step": 112000
697
- },
698
- {
699
- "epoch": 1.04,
700
- "learning_rate": 9.891880951101407e-05,
701
- "loss": 0.5986,
702
- "step": 113000
703
- },
704
- {
705
- "epoch": 1.04,
706
- "learning_rate": 9.888434282076758e-05,
707
- "loss": 0.6309,
708
- "step": 114000
709
- },
710
- {
711
- "epoch": 1.04,
712
- "learning_rate": 9.884934153917997e-05,
713
- "loss": 0.7246,
714
- "step": 115000
715
- },
716
- {
717
- "epoch": 1.04,
718
- "learning_rate": 9.881380604901964e-05,
719
- "loss": 0.6311,
720
- "step": 116000
721
- },
722
- {
723
- "epoch": 1.04,
724
- "learning_rate": 9.877773673889701e-05,
725
- "loss": 0.5719,
726
- "step": 117000
727
- },
728
- {
729
- "epoch": 1.04,
730
- "learning_rate": 9.87411340032603e-05,
731
- "loss": 0.6915,
732
- "step": 118000
733
- },
734
- {
735
- "epoch": 1.04,
736
- "learning_rate": 9.870399824239117e-05,
737
- "loss": 0.7171,
738
- "step": 119000
739
- },
740
- {
741
- "epoch": 1.05,
742
- "learning_rate": 9.86663298624003e-05,
743
- "loss": 0.6416,
744
- "step": 120000
745
- },
746
- {
747
- "epoch": 1.05,
748
- "learning_rate": 9.862812927522309e-05,
749
- "loss": 0.5733,
750
- "step": 121000
751
- },
752
- {
753
- "epoch": 1.05,
754
- "learning_rate": 9.858939689861506e-05,
755
- "loss": 0.6067,
756
- "step": 122000
757
- },
758
- {
759
- "epoch": 1.05,
760
- "learning_rate": 9.855013315614725e-05,
761
- "loss": 0.7015,
762
- "step": 123000
763
- },
764
- {
765
- "epoch": 1.05,
766
- "learning_rate": 9.851033847720166e-05,
767
- "loss": 0.631,
768
- "step": 124000
769
- },
770
- {
771
- "epoch": 1.05,
772
- "learning_rate": 9.847001329696653e-05,
773
- "loss": 0.5199,
774
- "step": 125000
775
- },
776
- {
777
- "epoch": 1.05,
778
- "learning_rate": 9.842915805643155e-05,
779
- "loss": 0.5407,
780
- "step": 126000
781
- },
782
- {
783
- "epoch": 1.05,
784
- "learning_rate": 9.838777320238312e-05,
785
- "loss": 0.6483,
786
- "step": 127000
787
- },
788
- {
789
- "epoch": 1.05,
790
- "learning_rate": 9.834585918739936e-05,
791
- "loss": 0.5515,
792
- "step": 128000
793
- },
794
- {
795
- "epoch": 1.05,
796
- "learning_rate": 9.830341646984521e-05,
797
- "loss": 0.476,
798
- "step": 129000
799
- },
800
- {
801
- "epoch": 1.06,
802
- "learning_rate": 9.826044551386744e-05,
803
- "loss": 0.5087,
804
- "step": 130000
805
- },
806
- {
807
- "epoch": 1.06,
808
- "learning_rate": 9.821694678938953e-05,
809
- "loss": 0.6735,
810
- "step": 131000
811
- },
812
- {
813
- "epoch": 1.06,
814
- "learning_rate": 9.817292077210659e-05,
815
- "loss": 0.5828,
816
- "step": 132000
817
- },
818
- {
819
- "epoch": 1.06,
820
- "learning_rate": 9.812836794348004e-05,
821
- "loss": 0.5029,
822
- "step": 133000
823
- },
824
- {
825
- "epoch": 1.06,
826
- "learning_rate": 9.808328879073251e-05,
827
- "loss": 0.538,
828
- "step": 134000
829
- },
830
- {
831
- "epoch": 1.06,
832
- "learning_rate": 9.803768380684242e-05,
833
- "loss": 0.6243,
834
- "step": 135000
835
- },
836
- {
837
- "epoch": 1.06,
838
- "learning_rate": 9.799155349053851e-05,
839
- "loss": 0.559,
840
- "step": 136000
841
- },
842
- {
843
- "epoch": 1.06,
844
- "learning_rate": 9.794489834629455e-05,
845
- "loss": 0.5442,
846
- "step": 137000
847
- },
848
- {
849
- "epoch": 1.06,
850
- "learning_rate": 9.789771888432375e-05,
851
- "loss": 0.6303,
852
- "step": 138000
853
- },
854
- {
855
- "epoch": 1.06,
856
- "learning_rate": 9.785001562057309e-05,
857
- "loss": 0.7137,
858
- "step": 139000
859
- },
860
- {
861
- "epoch": 1.07,
862
- "learning_rate": 9.780178907671789e-05,
863
- "loss": 0.6225,
864
- "step": 140000
865
- },
866
- {
867
- "epoch": 1.07,
868
- "learning_rate": 9.775303978015585e-05,
869
- "loss": 0.5664,
870
- "step": 141000
871
- },
872
- {
873
- "epoch": 1.07,
874
- "learning_rate": 9.77037682640015e-05,
875
- "loss": 0.612,
876
- "step": 142000
877
- },
878
- {
879
- "epoch": 1.07,
880
- "learning_rate": 9.765397506708023e-05,
881
- "loss": 0.6986,
882
- "step": 143000
883
- },
884
- {
885
- "epoch": 1.07,
886
- "learning_rate": 9.760366073392246e-05,
887
- "loss": 0.6242,
888
- "step": 144000
889
- },
890
- {
891
- "epoch": 1.07,
892
- "learning_rate": 9.755282581475769e-05,
893
- "loss": 0.5591,
894
- "step": 145000
895
- },
896
- {
897
- "epoch": 1.07,
898
- "learning_rate": 9.750147086550844e-05,
899
- "loss": 0.607,
900
- "step": 146000
901
- },
902
- {
903
- "epoch": 1.07,
904
- "learning_rate": 9.744959644778422e-05,
905
- "loss": 0.6867,
906
- "step": 147000
907
- },
908
- {
909
- "epoch": 1.07,
910
- "learning_rate": 9.739720312887535e-05,
911
- "loss": 0.6058,
912
- "step": 148000
913
- },
914
- {
915
- "epoch": 1.07,
916
- "learning_rate": 9.734429148174675e-05,
917
- "loss": 0.5436,
918
- "step": 149000
919
- },
920
- {
921
- "epoch": 2.0,
922
- "learning_rate": 9.729086208503174e-05,
923
- "loss": 0.6304,
924
- "step": 150000
925
- },
926
- {
927
- "epoch": 2.0,
928
- "eval_loss": 0.421994686126709,
929
- "eval_runtime": 19.4466,
930
- "eval_samples_per_second": 257.114,
931
- "eval_steps_per_second": 4.062,
932
- "step": 150000
933
  }
934
  ],
935
  "max_steps": 1000000,
936
  "num_train_epochs": 9223372036854775807,
937
- "total_flos": 8.947529484790137e+18,
938
  "trial_name": null,
939
  "trial_params": null
940
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.025175,
5
+ "global_step": 100000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
618
  {
619
  "epoch": 1.03,
620
  "eval_loss": 0.4750686585903168,
621
+ "eval_runtime": 32.1999,
622
+ "eval_samples_per_second": 155.28,
623
+ "eval_steps_per_second": 2.453,
624
  "step": 100000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
625
  }
626
  ],
627
  "max_steps": 1000000,
628
  "num_train_epochs": 9223372036854775807,
629
+ "total_flos": 5.965012200289468e+18,
630
  "trial_name": null,
631
  "trial_params": null
632
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2fdc390cc059f4ea67f02d60dce87ec4a8bb54eedf9e688c28d4d67ce830c58
3
  size 3503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d436a85c1e8005a725fc38ac48546a83c0b2088af49c3e1ad07ce2c60e7abadd
3
  size 3503
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:211a166e31201e3f8c624e735065a9e1e51a2bee91e3092fd30f622e42f9f94b
3
  size 1346893675
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72210ddc5a03c29df4e488fe110d112c8b5dcd865f434eebeec4c5357bf30f3e
3
  size 1346893675
runs/Feb17_16-44-14_t1v-n-91d26b69-w-0/events.out.tfevents.1676652795.t1v-n-91d26b69-w-0.1590282.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d74077e22d15f354e66a7ab30a887876b9e6e117b079fa7f385d049d48231232
3
- size 20406
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf2b973a560dd069539c0ffb87c7060a60e20340064c91a371045f3c24711209
3
+ size 25566
runs/Feb25_19-23-37_t1v-n-91d26b69-w-0/1677353056.0324807/events.out.tfevents.1677353056.t1v-n-91d26b69-w-0.3629124.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c1d53db010811aeb98c7545768e4a4a39b4f259c129069f9e2aaf2ad0d47f34
3
+ size 5514
runs/Feb25_19-23-37_t1v-n-91d26b69-w-0/events.out.tfevents.1677353056.t1v-n-91d26b69-w-0.3629124.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbbb8416973162fa401e46fa4fa4173a62fc112808de22ca92c90b4d040c9203
3
+ size 12130
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2fdc390cc059f4ea67f02d60dce87ec4a8bb54eedf9e688c28d4d67ce830c58
3
  size 3503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d436a85c1e8005a725fc38ac48546a83c0b2088af49c3e1ad07ce2c60e7abadd
3
  size 3503