Romain-XV commited on
Commit
2e81cf3
·
verified ·
1 Parent(s): dc80595

Training in progress, step 128, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0349df464b4639cd3b912ec1b2ac00bffd66c29cea313e57b29682cd54e5c7e3
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d601d43dade33badf98bf0461027583958dd8667ff02874ea062b250cd9db81
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5ecb111f4b4668d7e476fa38b96190cc9a21631b7cf5344f19938dd2c19747f
3
  size 85723284
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6c793b1cb9548fd561c88d3ad584df110322f62b86194ea29aeedbd50f780a3
3
  size 85723284
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a990d0f6ec1e99435be5e64ba855d80d12af86cad358e24e7e8cc8741b51a9a7
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0810b5b5d390c1667c8d6c6351c2b0f8ee6d357396bbdeb5cbca15c6b011b798
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c54d816848a62c27523c4718d987d5248a2b166cfdaf873e405061cb5285a5d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:968a6dcbda34982ad43f2af2e04e5edf94e043c521201e71b1583695497d18e0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.24994200468063354,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 0.7839294463498285,
5
  "eval_steps": 50,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -731,6 +731,202 @@
731
  "eval_samples_per_second": 7.183,
732
  "eval_steps_per_second": 1.804,
733
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
734
  }
735
  ],
736
  "logging_steps": 1,
@@ -754,12 +950,12 @@
754
  "should_evaluate": false,
755
  "should_log": false,
756
  "should_save": true,
757
- "should_training_stop": false
758
  },
759
  "attributes": {}
760
  }
761
  },
762
- "total_flos": 5.627509890613248e+17,
763
  "train_batch_size": 4,
764
  "trial_name": null,
765
  "trial_params": null
 
1
  {
2
  "best_metric": 0.24994200468063354,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 1.006859382655561,
5
  "eval_steps": 50,
6
+ "global_step": 128,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
731
  "eval_samples_per_second": 7.183,
732
  "eval_steps_per_second": 1.804,
733
  "step": 100
734
+ },
735
+ {
736
+ "epoch": 0.7917687408133268,
737
+ "grad_norm": 4.1459736824035645,
738
+ "learning_rate": 2.4742923014386156e-05,
739
+ "loss": 4.0253,
740
+ "step": 101
741
+ },
742
+ {
743
+ "epoch": 0.7996080352768251,
744
+ "grad_norm": 4.166021347045898,
745
+ "learning_rate": 2.301660165700936e-05,
746
+ "loss": 4.1193,
747
+ "step": 102
748
+ },
749
+ {
750
+ "epoch": 0.8074473297403234,
751
+ "grad_norm": 4.5724639892578125,
752
+ "learning_rate": 2.1344844419735755e-05,
753
+ "loss": 4.1613,
754
+ "step": 103
755
+ },
756
+ {
757
+ "epoch": 0.8152866242038217,
758
+ "grad_norm": 3.7771105766296387,
759
+ "learning_rate": 1.9728836206903656e-05,
760
+ "loss": 3.577,
761
+ "step": 104
762
+ },
763
+ {
764
+ "epoch": 0.82312591866732,
765
+ "grad_norm": 4.427511215209961,
766
+ "learning_rate": 1.8169722409183097e-05,
767
+ "loss": 4.0308,
768
+ "step": 105
769
+ },
770
+ {
771
+ "epoch": 0.8309652131308183,
772
+ "grad_norm": 4.253128528594971,
773
+ "learning_rate": 1.6668608091748495e-05,
774
+ "loss": 4.358,
775
+ "step": 106
776
+ },
777
+ {
778
+ "epoch": 0.8388045075943165,
779
+ "grad_norm": 3.6124472618103027,
780
+ "learning_rate": 1.522655721103291e-05,
781
+ "loss": 3.8505,
782
+ "step": 107
783
+ },
784
+ {
785
+ "epoch": 0.8466438020578148,
786
+ "grad_norm": 4.112203598022461,
787
+ "learning_rate": 1.3844591860619383e-05,
788
+ "loss": 3.9511,
789
+ "step": 108
790
+ },
791
+ {
792
+ "epoch": 0.854483096521313,
793
+ "grad_norm": 3.7493703365325928,
794
+ "learning_rate": 1.2523691546803873e-05,
795
+ "loss": 3.8253,
796
+ "step": 109
797
+ },
798
+ {
799
+ "epoch": 0.8623223909848113,
800
+ "grad_norm": 4.115346431732178,
801
+ "learning_rate": 1.1264792494342857e-05,
802
+ "loss": 3.8934,
803
+ "step": 110
804
+ },
805
+ {
806
+ "epoch": 0.8701616854483096,
807
+ "grad_norm": 4.116607666015625,
808
+ "learning_rate": 1.0068786982878087e-05,
809
+ "loss": 3.7177,
810
+ "step": 111
811
+ },
812
+ {
813
+ "epoch": 0.8780009799118079,
814
+ "grad_norm": 3.9183499813079834,
815
+ "learning_rate": 8.936522714508678e-06,
816
+ "loss": 3.8943,
817
+ "step": 112
818
+ },
819
+ {
820
+ "epoch": 0.8858402743753062,
821
+ "grad_norm": 4.0384039878845215,
822
+ "learning_rate": 7.868802212958703e-06,
823
+ "loss": 3.4521,
824
+ "step": 113
825
+ },
826
+ {
827
+ "epoch": 0.8936795688388045,
828
+ "grad_norm": 4.025205612182617,
829
+ "learning_rate": 6.866382254766157e-06,
830
+ "loss": 3.8196,
831
+ "step": 114
832
+ },
833
+ {
834
+ "epoch": 0.9015188633023028,
835
+ "grad_norm": 4.01348352432251,
836
+ "learning_rate": 5.929973332896677e-06,
837
+ "loss": 3.9242,
838
+ "step": 115
839
+ },
840
+ {
841
+ "epoch": 0.9093581577658011,
842
+ "grad_norm": 4.146557331085205,
843
+ "learning_rate": 5.060239153161872e-06,
844
+ "loss": 4.0043,
845
+ "step": 116
846
+ },
847
+ {
848
+ "epoch": 0.9171974522292994,
849
+ "grad_norm": 4.394860744476318,
850
+ "learning_rate": 4.257796163799455e-06,
851
+ "loss": 3.8837,
852
+ "step": 117
853
+ },
854
+ {
855
+ "epoch": 0.9250367466927977,
856
+ "grad_norm": 4.56512451171875,
857
+ "learning_rate": 3.5232131185484076e-06,
858
+ "loss": 4.3846,
859
+ "step": 118
860
+ },
861
+ {
862
+ "epoch": 0.932876041156296,
863
+ "grad_norm": 3.989962577819824,
864
+ "learning_rate": 2.857010673529015e-06,
865
+ "loss": 3.3769,
866
+ "step": 119
867
+ },
868
+ {
869
+ "epoch": 0.9407153356197943,
870
+ "grad_norm": 4.115790843963623,
871
+ "learning_rate": 2.259661018213333e-06,
872
+ "loss": 3.7523,
873
+ "step": 120
874
+ },
875
+ {
876
+ "epoch": 0.9485546300832925,
877
+ "grad_norm": 4.354365348815918,
878
+ "learning_rate": 1.7315875407479032e-06,
879
+ "loss": 3.7809,
880
+ "step": 121
881
+ },
882
+ {
883
+ "epoch": 0.9563939245467908,
884
+ "grad_norm": 4.128818035125732,
885
+ "learning_rate": 1.2731645278655445e-06,
886
+ "loss": 3.8053,
887
+ "step": 122
888
+ },
889
+ {
890
+ "epoch": 0.964233219010289,
891
+ "grad_norm": 4.223034858703613,
892
+ "learning_rate": 8.847168995992916e-07,
893
+ "loss": 3.5431,
894
+ "step": 123
895
+ },
896
+ {
897
+ "epoch": 0.9720725134737873,
898
+ "grad_norm": 4.52358341217041,
899
+ "learning_rate": 5.665199789862907e-07,
900
+ "loss": 4.4289,
901
+ "step": 124
902
+ },
903
+ {
904
+ "epoch": 0.9799118079372856,
905
+ "grad_norm": 3.970877170562744,
906
+ "learning_rate": 3.1879929692498757e-07,
907
+ "loss": 3.9163,
908
+ "step": 125
909
+ },
910
+ {
911
+ "epoch": 0.9877511024007839,
912
+ "grad_norm": 4.14064359664917,
913
+ "learning_rate": 1.4173043232380557e-07,
914
+ "loss": 3.9214,
915
+ "step": 126
916
+ },
917
+ {
918
+ "epoch": 0.9955903968642822,
919
+ "grad_norm": 3.948699712753296,
920
+ "learning_rate": 3.5438887654737355e-08,
921
+ "loss": 4.2029,
922
+ "step": 127
923
+ },
924
+ {
925
+ "epoch": 1.006859382655561,
926
+ "grad_norm": 3.799189567565918,
927
+ "learning_rate": 0.0,
928
+ "loss": 3.387,
929
+ "step": 128
930
  }
931
  ],
932
  "logging_steps": 1,
 
950
  "should_evaluate": false,
951
  "should_log": false,
952
  "should_save": true,
953
+ "should_training_stop": true
954
  },
955
  "attributes": {}
956
  }
957
  },
958
+ "total_flos": 7.203212659984957e+17,
959
  "train_batch_size": 4,
960
  "trial_name": null,
961
  "trial_params": null