Helw150 commited on
Commit
257b73a
·
1 Parent(s): e9df8b7

Soft Detokenize

Browse files
model-00001-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4ed5a94e6ce4595ee420c3f1381c856ab4862f3883765258457ae668502bccf
3
- size 4882944984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afdb67d2bc15ec119b768b0b7d0e95f5e60f700ba6f55bf3050cdfc43423a386
3
+ size 4882945232
model-00002-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:147984343e0e125854978f66f01fa827cbff8df6e806ae08bdc9ad54a657a22b
3
- size 4857206960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79eacf70624c1cb871654f5070a73a16202d3d8ffd59aff8ae4bdef551813d2f
3
+ size 4857207288
model-00003-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e46103ff754bd92facb6cf9ebc9e24a4007146613ebe0e976a8814e992e91af
3
- size 4857207008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08d7f3fd1ef9eb8c55cf8609d28bb1e56727cb919c85a7ec833574fe7db60baf
3
+ size 4857207336
model-00004-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:037584ac2640f95afe4972971e92af4eebbd36786058743199ef94dadd5a0f6e
3
- size 4857207008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c565b9322a087f8be725428c55426133cde4dd1f6d76e354781fc18bac6bfb64
3
+ size 4857207336
model-00005-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8876008ce6c89f563e07560899a6c74e9ad00d28793fc1c4b3e44050688f14f1
3
- size 4857207008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:777473c0710bf4125097f672dd9a001a24c0e18556c734ba498eb4a7f2e78164
3
+ size 4857207336
model-00006-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d7f03748df733cbaf272a7d14e26527c04ff6e96f07a169085e0209bf03342b
3
- size 4992935216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9ec15bc352ab3cf2f3e95c1c98045d65902ab1ae6a2a5926d5227da9dedbd91
3
+ size 4992514328
model-00007-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c053e5745a10eb2ec0a36840e037a06e3672a2c23a6598974a8733669b7d9144
3
- size 4150889888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5624f78bd79c51180e669196f65f190709879ee082c694f12491fac78eeb5eb0
3
+ size 4675599080
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 33455419392
4
  },
5
  "weight_map": {
6
  "connector.embed_positions.weight": "model-00007-of-00007.safetensors",
@@ -775,296 +775,297 @@
775
  "connector.layers.9.self_attn.v_proj.weight": "model-00007-of-00007.safetensors",
776
  "connector.layers.9.self_attn_layer_norm.bias": "model-00007-of-00007.safetensors",
777
  "connector.layers.9.self_attn_layer_norm.weight": "model-00007-of-00007.safetensors",
778
- "decoder.embed_tokens.weight": "model-00001-of-00007.safetensors",
779
- "decoder.layers.0.input_layernorm.weight": "model-00001-of-00007.safetensors",
780
- "decoder.layers.0.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
781
- "decoder.layers.0.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
782
- "decoder.layers.0.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
783
- "decoder.layers.0.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
784
- "decoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
785
- "decoder.layers.0.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
786
- "decoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
787
- "decoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
788
- "decoder.layers.1.input_layernorm.weight": "model-00001-of-00007.safetensors",
789
- "decoder.layers.1.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
790
- "decoder.layers.1.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
791
- "decoder.layers.1.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
792
- "decoder.layers.1.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
793
- "decoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
794
- "decoder.layers.1.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
795
- "decoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
796
- "decoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
797
- "decoder.layers.10.input_layernorm.weight": "model-00003-of-00007.safetensors",
798
- "decoder.layers.10.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
799
- "decoder.layers.10.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
800
- "decoder.layers.10.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
801
- "decoder.layers.10.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
802
- "decoder.layers.10.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
803
- "decoder.layers.10.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
804
- "decoder.layers.10.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
805
- "decoder.layers.10.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
806
- "decoder.layers.11.input_layernorm.weight": "model-00003-of-00007.safetensors",
807
- "decoder.layers.11.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
808
- "decoder.layers.11.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
809
- "decoder.layers.11.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
810
- "decoder.layers.11.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
811
- "decoder.layers.11.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
812
- "decoder.layers.11.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
813
- "decoder.layers.11.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
814
- "decoder.layers.11.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
815
- "decoder.layers.12.input_layernorm.weight": "model-00003-of-00007.safetensors",
816
- "decoder.layers.12.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
817
- "decoder.layers.12.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
818
- "decoder.layers.12.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
819
- "decoder.layers.12.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
820
- "decoder.layers.12.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
821
- "decoder.layers.12.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
822
- "decoder.layers.12.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
823
- "decoder.layers.12.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
824
- "decoder.layers.13.input_layernorm.weight": "model-00003-of-00007.safetensors",
825
- "decoder.layers.13.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
826
- "decoder.layers.13.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
827
- "decoder.layers.13.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
828
- "decoder.layers.13.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
829
- "decoder.layers.13.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
830
- "decoder.layers.13.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
831
- "decoder.layers.13.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
832
- "decoder.layers.13.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
833
- "decoder.layers.14.input_layernorm.weight": "model-00003-of-00007.safetensors",
834
- "decoder.layers.14.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
835
- "decoder.layers.14.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
836
- "decoder.layers.14.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
837
- "decoder.layers.14.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
838
- "decoder.layers.14.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
839
- "decoder.layers.14.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
840
- "decoder.layers.14.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
841
- "decoder.layers.14.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
842
- "decoder.layers.15.input_layernorm.weight": "model-00003-of-00007.safetensors",
843
- "decoder.layers.15.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
844
- "decoder.layers.15.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
845
- "decoder.layers.15.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
846
- "decoder.layers.15.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
847
- "decoder.layers.15.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
848
- "decoder.layers.15.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
849
- "decoder.layers.15.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
850
- "decoder.layers.15.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
851
- "decoder.layers.16.input_layernorm.weight": "model-00004-of-00007.safetensors",
852
- "decoder.layers.16.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
853
- "decoder.layers.16.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
854
- "decoder.layers.16.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
855
- "decoder.layers.16.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
856
- "decoder.layers.16.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
857
- "decoder.layers.16.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
858
- "decoder.layers.16.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
859
- "decoder.layers.16.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
860
- "decoder.layers.17.input_layernorm.weight": "model-00004-of-00007.safetensors",
861
- "decoder.layers.17.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
862
- "decoder.layers.17.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
863
- "decoder.layers.17.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
864
- "decoder.layers.17.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
865
- "decoder.layers.17.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
866
- "decoder.layers.17.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
867
- "decoder.layers.17.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
868
- "decoder.layers.17.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
869
- "decoder.layers.18.input_layernorm.weight": "model-00004-of-00007.safetensors",
870
- "decoder.layers.18.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
871
- "decoder.layers.18.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
872
- "decoder.layers.18.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
873
- "decoder.layers.18.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
874
- "decoder.layers.18.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
875
- "decoder.layers.18.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
876
- "decoder.layers.18.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
877
- "decoder.layers.18.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
878
- "decoder.layers.19.input_layernorm.weight": "model-00004-of-00007.safetensors",
879
- "decoder.layers.19.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
880
- "decoder.layers.19.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
881
- "decoder.layers.19.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
882
- "decoder.layers.19.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
883
- "decoder.layers.19.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
884
- "decoder.layers.19.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
885
- "decoder.layers.19.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
886
- "decoder.layers.19.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
887
- "decoder.layers.2.input_layernorm.weight": "model-00001-of-00007.safetensors",
888
- "decoder.layers.2.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
889
- "decoder.layers.2.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
890
- "decoder.layers.2.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
891
- "decoder.layers.2.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
892
- "decoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
893
- "decoder.layers.2.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
894
- "decoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
895
- "decoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
896
- "decoder.layers.20.input_layernorm.weight": "model-00004-of-00007.safetensors",
897
- "decoder.layers.20.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
898
- "decoder.layers.20.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
899
- "decoder.layers.20.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
900
- "decoder.layers.20.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
901
- "decoder.layers.20.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
902
- "decoder.layers.20.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
903
- "decoder.layers.20.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
904
- "decoder.layers.20.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
905
- "decoder.layers.21.input_layernorm.weight": "model-00004-of-00007.safetensors",
906
- "decoder.layers.21.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
907
- "decoder.layers.21.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
908
- "decoder.layers.21.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
909
- "decoder.layers.21.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
910
- "decoder.layers.21.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
911
- "decoder.layers.21.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
912
- "decoder.layers.21.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
913
- "decoder.layers.21.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
914
- "decoder.layers.22.input_layernorm.weight": "model-00005-of-00007.safetensors",
915
- "decoder.layers.22.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
916
- "decoder.layers.22.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
917
- "decoder.layers.22.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
918
- "decoder.layers.22.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
919
- "decoder.layers.22.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
920
- "decoder.layers.22.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
921
- "decoder.layers.22.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
922
- "decoder.layers.22.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
923
- "decoder.layers.23.input_layernorm.weight": "model-00005-of-00007.safetensors",
924
- "decoder.layers.23.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
925
- "decoder.layers.23.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
926
- "decoder.layers.23.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
927
- "decoder.layers.23.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
928
- "decoder.layers.23.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
929
- "decoder.layers.23.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
930
- "decoder.layers.23.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
931
- "decoder.layers.23.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
932
- "decoder.layers.24.input_layernorm.weight": "model-00005-of-00007.safetensors",
933
- "decoder.layers.24.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
934
- "decoder.layers.24.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
935
- "decoder.layers.24.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
936
- "decoder.layers.24.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
937
- "decoder.layers.24.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
938
- "decoder.layers.24.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
939
- "decoder.layers.24.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
940
- "decoder.layers.24.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
941
- "decoder.layers.25.input_layernorm.weight": "model-00005-of-00007.safetensors",
942
- "decoder.layers.25.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
943
- "decoder.layers.25.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
944
- "decoder.layers.25.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
945
- "decoder.layers.25.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
946
- "decoder.layers.25.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
947
- "decoder.layers.25.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
948
- "decoder.layers.25.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
949
- "decoder.layers.25.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
950
- "decoder.layers.26.input_layernorm.weight": "model-00005-of-00007.safetensors",
951
- "decoder.layers.26.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
952
- "decoder.layers.26.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
953
- "decoder.layers.26.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
954
- "decoder.layers.26.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
955
- "decoder.layers.26.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
956
- "decoder.layers.26.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
957
- "decoder.layers.26.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
958
- "decoder.layers.26.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
959
- "decoder.layers.27.input_layernorm.weight": "model-00005-of-00007.safetensors",
960
- "decoder.layers.27.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
961
- "decoder.layers.27.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
962
- "decoder.layers.27.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
963
- "decoder.layers.27.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
964
- "decoder.layers.27.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
965
- "decoder.layers.27.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
966
- "decoder.layers.27.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
967
- "decoder.layers.27.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
968
- "decoder.layers.28.input_layernorm.weight": "model-00006-of-00007.safetensors",
969
- "decoder.layers.28.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
970
- "decoder.layers.28.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
971
- "decoder.layers.28.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
972
- "decoder.layers.28.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
973
- "decoder.layers.28.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
974
- "decoder.layers.28.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
975
- "decoder.layers.28.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
976
- "decoder.layers.28.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
977
- "decoder.layers.29.input_layernorm.weight": "model-00006-of-00007.safetensors",
978
- "decoder.layers.29.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
979
- "decoder.layers.29.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
980
- "decoder.layers.29.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
981
- "decoder.layers.29.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
982
- "decoder.layers.29.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
983
- "decoder.layers.29.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
984
- "decoder.layers.29.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
985
- "decoder.layers.29.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
986
- "decoder.layers.3.input_layernorm.weight": "model-00001-of-00007.safetensors",
987
- "decoder.layers.3.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
988
- "decoder.layers.3.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
989
- "decoder.layers.3.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
990
- "decoder.layers.3.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
991
- "decoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
992
- "decoder.layers.3.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
993
- "decoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
994
- "decoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
995
- "decoder.layers.30.input_layernorm.weight": "model-00006-of-00007.safetensors",
996
- "decoder.layers.30.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
997
- "decoder.layers.30.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
998
- "decoder.layers.30.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
999
- "decoder.layers.30.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
1000
- "decoder.layers.30.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
1001
- "decoder.layers.30.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
1002
- "decoder.layers.30.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
1003
- "decoder.layers.30.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
1004
- "decoder.layers.31.input_layernorm.weight": "model-00006-of-00007.safetensors",
1005
- "decoder.layers.31.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
1006
- "decoder.layers.31.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
1007
- "decoder.layers.31.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
1008
- "decoder.layers.31.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
1009
- "decoder.layers.31.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
1010
- "decoder.layers.31.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
1011
- "decoder.layers.31.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
1012
- "decoder.layers.31.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
1013
- "decoder.layers.4.input_layernorm.weight": "model-00002-of-00007.safetensors",
1014
- "decoder.layers.4.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
1015
- "decoder.layers.4.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
1016
- "decoder.layers.4.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
1017
- "decoder.layers.4.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
1018
- "decoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
1019
- "decoder.layers.4.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
1020
- "decoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
1021
- "decoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
1022
- "decoder.layers.5.input_layernorm.weight": "model-00002-of-00007.safetensors",
1023
- "decoder.layers.5.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
1024
- "decoder.layers.5.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
1025
- "decoder.layers.5.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
1026
- "decoder.layers.5.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
1027
- "decoder.layers.5.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
1028
- "decoder.layers.5.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
1029
- "decoder.layers.5.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
1030
- "decoder.layers.5.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
1031
- "decoder.layers.6.input_layernorm.weight": "model-00002-of-00007.safetensors",
1032
- "decoder.layers.6.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
1033
- "decoder.layers.6.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
1034
- "decoder.layers.6.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
1035
- "decoder.layers.6.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
1036
- "decoder.layers.6.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
1037
- "decoder.layers.6.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
1038
- "decoder.layers.6.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
1039
- "decoder.layers.6.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
1040
- "decoder.layers.7.input_layernorm.weight": "model-00002-of-00007.safetensors",
1041
- "decoder.layers.7.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
1042
- "decoder.layers.7.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
1043
- "decoder.layers.7.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
1044
- "decoder.layers.7.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
1045
- "decoder.layers.7.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
1046
- "decoder.layers.7.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
1047
- "decoder.layers.7.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
1048
- "decoder.layers.7.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
1049
- "decoder.layers.8.input_layernorm.weight": "model-00002-of-00007.safetensors",
1050
- "decoder.layers.8.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
1051
- "decoder.layers.8.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
1052
- "decoder.layers.8.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
1053
- "decoder.layers.8.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
1054
- "decoder.layers.8.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
1055
- "decoder.layers.8.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
1056
- "decoder.layers.8.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
1057
- "decoder.layers.8.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
1058
- "decoder.layers.9.input_layernorm.weight": "model-00002-of-00007.safetensors",
1059
- "decoder.layers.9.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
1060
- "decoder.layers.9.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
1061
- "decoder.layers.9.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
1062
- "decoder.layers.9.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
1063
- "decoder.layers.9.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
1064
- "decoder.layers.9.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
1065
- "decoder.layers.9.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
1066
- "decoder.layers.9.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
1067
- "decoder.norm.weight": "model-00006-of-00007.safetensors",
 
1068
  "encoder.conv1.bias": "model-00006-of-00007.safetensors",
1069
  "encoder.conv1.weight": "model-00006-of-00007.safetensors",
1070
  "encoder.conv2.bias": "model-00006-of-00007.safetensors",
@@ -1224,10 +1225,10 @@
1224
  "encoder.layers.17.self_attn_layer_norm.weight": "model-00006-of-00007.safetensors",
1225
  "encoder.layers.18.fc1.bias": "model-00006-of-00007.safetensors",
1226
  "encoder.layers.18.fc1.weight": "model-00006-of-00007.safetensors",
1227
- "encoder.layers.18.fc2.bias": "model-00006-of-00007.safetensors",
1228
- "encoder.layers.18.fc2.weight": "model-00006-of-00007.safetensors",
1229
- "encoder.layers.18.final_layer_norm.bias": "model-00006-of-00007.safetensors",
1230
- "encoder.layers.18.final_layer_norm.weight": "model-00006-of-00007.safetensors",
1231
  "encoder.layers.18.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
1232
  "encoder.layers.18.self_attn.out_proj.bias": "model-00006-of-00007.safetensors",
1233
  "encoder.layers.18.self_attn.out_proj.weight": "model-00006-of-00007.safetensors",
@@ -1237,21 +1238,21 @@
1237
  "encoder.layers.18.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
1238
  "encoder.layers.18.self_attn_layer_norm.bias": "model-00006-of-00007.safetensors",
1239
  "encoder.layers.18.self_attn_layer_norm.weight": "model-00006-of-00007.safetensors",
1240
- "encoder.layers.19.fc1.bias": "model-00006-of-00007.safetensors",
1241
- "encoder.layers.19.fc1.weight": "model-00006-of-00007.safetensors",
1242
- "encoder.layers.19.fc2.bias": "model-00006-of-00007.safetensors",
1243
- "encoder.layers.19.fc2.weight": "model-00006-of-00007.safetensors",
1244
- "encoder.layers.19.final_layer_norm.bias": "model-00006-of-00007.safetensors",
1245
- "encoder.layers.19.final_layer_norm.weight": "model-00006-of-00007.safetensors",
1246
- "encoder.layers.19.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
1247
- "encoder.layers.19.self_attn.out_proj.bias": "model-00006-of-00007.safetensors",
1248
- "encoder.layers.19.self_attn.out_proj.weight": "model-00006-of-00007.safetensors",
1249
- "encoder.layers.19.self_attn.q_proj.bias": "model-00006-of-00007.safetensors",
1250
- "encoder.layers.19.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
1251
- "encoder.layers.19.self_attn.v_proj.bias": "model-00006-of-00007.safetensors",
1252
- "encoder.layers.19.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
1253
- "encoder.layers.19.self_attn_layer_norm.bias": "model-00006-of-00007.safetensors",
1254
- "encoder.layers.19.self_attn_layer_norm.weight": "model-00006-of-00007.safetensors",
1255
  "encoder.layers.2.fc1.bias": "model-00006-of-00007.safetensors",
1256
  "encoder.layers.2.fc1.weight": "model-00006-of-00007.safetensors",
1257
  "encoder.layers.2.fc2.bias": "model-00006-of-00007.safetensors",
@@ -1267,96 +1268,96 @@
1267
  "encoder.layers.2.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
1268
  "encoder.layers.2.self_attn_layer_norm.bias": "model-00006-of-00007.safetensors",
1269
  "encoder.layers.2.self_attn_layer_norm.weight": "model-00006-of-00007.safetensors",
1270
- "encoder.layers.20.fc1.bias": "model-00006-of-00007.safetensors",
1271
- "encoder.layers.20.fc1.weight": "model-00006-of-00007.safetensors",
1272
- "encoder.layers.20.fc2.bias": "model-00006-of-00007.safetensors",
1273
- "encoder.layers.20.fc2.weight": "model-00006-of-00007.safetensors",
1274
- "encoder.layers.20.final_layer_norm.bias": "model-00006-of-00007.safetensors",
1275
- "encoder.layers.20.final_layer_norm.weight": "model-00006-of-00007.safetensors",
1276
- "encoder.layers.20.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
1277
- "encoder.layers.20.self_attn.out_proj.bias": "model-00006-of-00007.safetensors",
1278
- "encoder.layers.20.self_attn.out_proj.weight": "model-00006-of-00007.safetensors",
1279
- "encoder.layers.20.self_attn.q_proj.bias": "model-00006-of-00007.safetensors",
1280
- "encoder.layers.20.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
1281
- "encoder.layers.20.self_attn.v_proj.bias": "model-00006-of-00007.safetensors",
1282
- "encoder.layers.20.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
1283
- "encoder.layers.20.self_attn_layer_norm.bias": "model-00006-of-00007.safetensors",
1284
- "encoder.layers.20.self_attn_layer_norm.weight": "model-00006-of-00007.safetensors",
1285
- "encoder.layers.21.fc1.bias": "model-00006-of-00007.safetensors",
1286
- "encoder.layers.21.fc1.weight": "model-00006-of-00007.safetensors",
1287
- "encoder.layers.21.fc2.bias": "model-00006-of-00007.safetensors",
1288
- "encoder.layers.21.fc2.weight": "model-00006-of-00007.safetensors",
1289
- "encoder.layers.21.final_layer_norm.bias": "model-00006-of-00007.safetensors",
1290
- "encoder.layers.21.final_layer_norm.weight": "model-00006-of-00007.safetensors",
1291
- "encoder.layers.21.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
1292
- "encoder.layers.21.self_attn.out_proj.bias": "model-00006-of-00007.safetensors",
1293
- "encoder.layers.21.self_attn.out_proj.weight": "model-00006-of-00007.safetensors",
1294
- "encoder.layers.21.self_attn.q_proj.bias": "model-00006-of-00007.safetensors",
1295
- "encoder.layers.21.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
1296
- "encoder.layers.21.self_attn.v_proj.bias": "model-00006-of-00007.safetensors",
1297
- "encoder.layers.21.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
1298
- "encoder.layers.21.self_attn_layer_norm.bias": "model-00006-of-00007.safetensors",
1299
- "encoder.layers.21.self_attn_layer_norm.weight": "model-00006-of-00007.safetensors",
1300
- "encoder.layers.22.fc1.bias": "model-00006-of-00007.safetensors",
1301
- "encoder.layers.22.fc1.weight": "model-00006-of-00007.safetensors",
1302
- "encoder.layers.22.fc2.bias": "model-00006-of-00007.safetensors",
1303
- "encoder.layers.22.fc2.weight": "model-00006-of-00007.safetensors",
1304
- "encoder.layers.22.final_layer_norm.bias": "model-00006-of-00007.safetensors",
1305
- "encoder.layers.22.final_layer_norm.weight": "model-00006-of-00007.safetensors",
1306
- "encoder.layers.22.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
1307
- "encoder.layers.22.self_attn.out_proj.bias": "model-00006-of-00007.safetensors",
1308
- "encoder.layers.22.self_attn.out_proj.weight": "model-00006-of-00007.safetensors",
1309
- "encoder.layers.22.self_attn.q_proj.bias": "model-00006-of-00007.safetensors",
1310
- "encoder.layers.22.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
1311
- "encoder.layers.22.self_attn.v_proj.bias": "model-00006-of-00007.safetensors",
1312
- "encoder.layers.22.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
1313
- "encoder.layers.22.self_attn_layer_norm.bias": "model-00006-of-00007.safetensors",
1314
- "encoder.layers.22.self_attn_layer_norm.weight": "model-00006-of-00007.safetensors",
1315
- "encoder.layers.23.fc1.bias": "model-00006-of-00007.safetensors",
1316
- "encoder.layers.23.fc1.weight": "model-00006-of-00007.safetensors",
1317
- "encoder.layers.23.fc2.bias": "model-00006-of-00007.safetensors",
1318
- "encoder.layers.23.fc2.weight": "model-00006-of-00007.safetensors",
1319
- "encoder.layers.23.final_layer_norm.bias": "model-00006-of-00007.safetensors",
1320
- "encoder.layers.23.final_layer_norm.weight": "model-00006-of-00007.safetensors",
1321
- "encoder.layers.23.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
1322
- "encoder.layers.23.self_attn.out_proj.bias": "model-00006-of-00007.safetensors",
1323
- "encoder.layers.23.self_attn.out_proj.weight": "model-00006-of-00007.safetensors",
1324
- "encoder.layers.23.self_attn.q_proj.bias": "model-00006-of-00007.safetensors",
1325
- "encoder.layers.23.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
1326
- "encoder.layers.23.self_attn.v_proj.bias": "model-00006-of-00007.safetensors",
1327
- "encoder.layers.23.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
1328
- "encoder.layers.23.self_attn_layer_norm.bias": "model-00006-of-00007.safetensors",
1329
- "encoder.layers.23.self_attn_layer_norm.weight": "model-00006-of-00007.safetensors",
1330
- "encoder.layers.24.fc1.bias": "model-00006-of-00007.safetensors",
1331
- "encoder.layers.24.fc1.weight": "model-00006-of-00007.safetensors",
1332
- "encoder.layers.24.fc2.bias": "model-00006-of-00007.safetensors",
1333
- "encoder.layers.24.fc2.weight": "model-00006-of-00007.safetensors",
1334
- "encoder.layers.24.final_layer_norm.bias": "model-00006-of-00007.safetensors",
1335
- "encoder.layers.24.final_layer_norm.weight": "model-00006-of-00007.safetensors",
1336
- "encoder.layers.24.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
1337
- "encoder.layers.24.self_attn.out_proj.bias": "model-00006-of-00007.safetensors",
1338
- "encoder.layers.24.self_attn.out_proj.weight": "model-00006-of-00007.safetensors",
1339
- "encoder.layers.24.self_attn.q_proj.bias": "model-00006-of-00007.safetensors",
1340
- "encoder.layers.24.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
1341
- "encoder.layers.24.self_attn.v_proj.bias": "model-00006-of-00007.safetensors",
1342
- "encoder.layers.24.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
1343
- "encoder.layers.24.self_attn_layer_norm.bias": "model-00006-of-00007.safetensors",
1344
- "encoder.layers.24.self_attn_layer_norm.weight": "model-00006-of-00007.safetensors",
1345
  "encoder.layers.25.fc1.bias": "model-00007-of-00007.safetensors",
1346
  "encoder.layers.25.fc1.weight": "model-00007-of-00007.safetensors",
1347
  "encoder.layers.25.fc2.bias": "model-00007-of-00007.safetensors",
1348
  "encoder.layers.25.fc2.weight": "model-00007-of-00007.safetensors",
1349
  "encoder.layers.25.final_layer_norm.bias": "model-00007-of-00007.safetensors",
1350
  "encoder.layers.25.final_layer_norm.weight": "model-00007-of-00007.safetensors",
1351
- "encoder.layers.25.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
1352
- "encoder.layers.25.self_attn.out_proj.bias": "model-00006-of-00007.safetensors",
1353
- "encoder.layers.25.self_attn.out_proj.weight": "model-00006-of-00007.safetensors",
1354
- "encoder.layers.25.self_attn.q_proj.bias": "model-00006-of-00007.safetensors",
1355
- "encoder.layers.25.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
1356
- "encoder.layers.25.self_attn.v_proj.bias": "model-00006-of-00007.safetensors",
1357
- "encoder.layers.25.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
1358
- "encoder.layers.25.self_attn_layer_norm.bias": "model-00006-of-00007.safetensors",
1359
- "encoder.layers.25.self_attn_layer_norm.weight": "model-00006-of-00007.safetensors",
1360
  "encoder.layers.26.fc1.bias": "model-00007-of-00007.safetensors",
1361
  "encoder.layers.26.fc1.weight": "model-00007-of-00007.safetensors",
1362
  "encoder.layers.26.fc2.bias": "model-00007-of-00007.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 33979707392
4
  },
5
  "weight_map": {
6
  "connector.embed_positions.weight": "model-00007-of-00007.safetensors",
 
775
  "connector.layers.9.self_attn.v_proj.weight": "model-00007-of-00007.safetensors",
776
  "connector.layers.9.self_attn_layer_norm.bias": "model-00007-of-00007.safetensors",
777
  "connector.layers.9.self_attn_layer_norm.weight": "model-00007-of-00007.safetensors",
778
+ "decoder.lm_head.weight": "model-00006-of-00007.safetensors",
779
+ "decoder.model.embed_tokens.weight": "model-00001-of-00007.safetensors",
780
+ "decoder.model.layers.0.input_layernorm.weight": "model-00001-of-00007.safetensors",
781
+ "decoder.model.layers.0.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
782
+ "decoder.model.layers.0.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
783
+ "decoder.model.layers.0.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
784
+ "decoder.model.layers.0.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
785
+ "decoder.model.layers.0.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
786
+ "decoder.model.layers.0.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
787
+ "decoder.model.layers.0.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
788
+ "decoder.model.layers.0.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
789
+ "decoder.model.layers.1.input_layernorm.weight": "model-00001-of-00007.safetensors",
790
+ "decoder.model.layers.1.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
791
+ "decoder.model.layers.1.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
792
+ "decoder.model.layers.1.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
793
+ "decoder.model.layers.1.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
794
+ "decoder.model.layers.1.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
795
+ "decoder.model.layers.1.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
796
+ "decoder.model.layers.1.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
797
+ "decoder.model.layers.1.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
798
+ "decoder.model.layers.10.input_layernorm.weight": "model-00003-of-00007.safetensors",
799
+ "decoder.model.layers.10.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
800
+ "decoder.model.layers.10.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
801
+ "decoder.model.layers.10.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
802
+ "decoder.model.layers.10.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
803
+ "decoder.model.layers.10.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
804
+ "decoder.model.layers.10.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
805
+ "decoder.model.layers.10.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
806
+ "decoder.model.layers.10.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
807
+ "decoder.model.layers.11.input_layernorm.weight": "model-00003-of-00007.safetensors",
808
+ "decoder.model.layers.11.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
809
+ "decoder.model.layers.11.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
810
+ "decoder.model.layers.11.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
811
+ "decoder.model.layers.11.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
812
+ "decoder.model.layers.11.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
813
+ "decoder.model.layers.11.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
814
+ "decoder.model.layers.11.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
815
+ "decoder.model.layers.11.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
816
+ "decoder.model.layers.12.input_layernorm.weight": "model-00003-of-00007.safetensors",
817
+ "decoder.model.layers.12.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
818
+ "decoder.model.layers.12.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
819
+ "decoder.model.layers.12.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
820
+ "decoder.model.layers.12.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
821
+ "decoder.model.layers.12.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
822
+ "decoder.model.layers.12.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
823
+ "decoder.model.layers.12.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
824
+ "decoder.model.layers.12.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
825
+ "decoder.model.layers.13.input_layernorm.weight": "model-00003-of-00007.safetensors",
826
+ "decoder.model.layers.13.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
827
+ "decoder.model.layers.13.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
828
+ "decoder.model.layers.13.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
829
+ "decoder.model.layers.13.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
830
+ "decoder.model.layers.13.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
831
+ "decoder.model.layers.13.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
832
+ "decoder.model.layers.13.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
833
+ "decoder.model.layers.13.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
834
+ "decoder.model.layers.14.input_layernorm.weight": "model-00003-of-00007.safetensors",
835
+ "decoder.model.layers.14.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
836
+ "decoder.model.layers.14.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
837
+ "decoder.model.layers.14.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
838
+ "decoder.model.layers.14.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
839
+ "decoder.model.layers.14.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
840
+ "decoder.model.layers.14.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
841
+ "decoder.model.layers.14.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
842
+ "decoder.model.layers.14.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
843
+ "decoder.model.layers.15.input_layernorm.weight": "model-00003-of-00007.safetensors",
844
+ "decoder.model.layers.15.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
845
+ "decoder.model.layers.15.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
846
+ "decoder.model.layers.15.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
847
+ "decoder.model.layers.15.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
848
+ "decoder.model.layers.15.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
849
+ "decoder.model.layers.15.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
850
+ "decoder.model.layers.15.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
851
+ "decoder.model.layers.15.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
852
+ "decoder.model.layers.16.input_layernorm.weight": "model-00004-of-00007.safetensors",
853
+ "decoder.model.layers.16.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
854
+ "decoder.model.layers.16.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
855
+ "decoder.model.layers.16.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
856
+ "decoder.model.layers.16.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
857
+ "decoder.model.layers.16.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
858
+ "decoder.model.layers.16.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
859
+ "decoder.model.layers.16.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
860
+ "decoder.model.layers.16.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
861
+ "decoder.model.layers.17.input_layernorm.weight": "model-00004-of-00007.safetensors",
862
+ "decoder.model.layers.17.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
863
+ "decoder.model.layers.17.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
864
+ "decoder.model.layers.17.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
865
+ "decoder.model.layers.17.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
866
+ "decoder.model.layers.17.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
867
+ "decoder.model.layers.17.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
868
+ "decoder.model.layers.17.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
869
+ "decoder.model.layers.17.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
870
+ "decoder.model.layers.18.input_layernorm.weight": "model-00004-of-00007.safetensors",
871
+ "decoder.model.layers.18.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
872
+ "decoder.model.layers.18.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
873
+ "decoder.model.layers.18.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
874
+ "decoder.model.layers.18.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
875
+ "decoder.model.layers.18.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
876
+ "decoder.model.layers.18.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
877
+ "decoder.model.layers.18.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
878
+ "decoder.model.layers.18.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
879
+ "decoder.model.layers.19.input_layernorm.weight": "model-00004-of-00007.safetensors",
880
+ "decoder.model.layers.19.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
881
+ "decoder.model.layers.19.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
882
+ "decoder.model.layers.19.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
883
+ "decoder.model.layers.19.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
884
+ "decoder.model.layers.19.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
885
+ "decoder.model.layers.19.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
886
+ "decoder.model.layers.19.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
887
+ "decoder.model.layers.19.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
888
+ "decoder.model.layers.2.input_layernorm.weight": "model-00001-of-00007.safetensors",
889
+ "decoder.model.layers.2.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
890
+ "decoder.model.layers.2.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
891
+ "decoder.model.layers.2.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
892
+ "decoder.model.layers.2.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
893
+ "decoder.model.layers.2.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
894
+ "decoder.model.layers.2.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
895
+ "decoder.model.layers.2.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
896
+ "decoder.model.layers.2.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
897
+ "decoder.model.layers.20.input_layernorm.weight": "model-00004-of-00007.safetensors",
898
+ "decoder.model.layers.20.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
899
+ "decoder.model.layers.20.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
900
+ "decoder.model.layers.20.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
901
+ "decoder.model.layers.20.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
902
+ "decoder.model.layers.20.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
903
+ "decoder.model.layers.20.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
904
+ "decoder.model.layers.20.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
905
+ "decoder.model.layers.20.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
906
+ "decoder.model.layers.21.input_layernorm.weight": "model-00004-of-00007.safetensors",
907
+ "decoder.model.layers.21.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
908
+ "decoder.model.layers.21.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
909
+ "decoder.model.layers.21.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
910
+ "decoder.model.layers.21.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
911
+ "decoder.model.layers.21.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
912
+ "decoder.model.layers.21.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
913
+ "decoder.model.layers.21.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
914
+ "decoder.model.layers.21.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
915
+ "decoder.model.layers.22.input_layernorm.weight": "model-00005-of-00007.safetensors",
916
+ "decoder.model.layers.22.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
917
+ "decoder.model.layers.22.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
918
+ "decoder.model.layers.22.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
919
+ "decoder.model.layers.22.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
920
+ "decoder.model.layers.22.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
921
+ "decoder.model.layers.22.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
922
+ "decoder.model.layers.22.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
923
+ "decoder.model.layers.22.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
924
+ "decoder.model.layers.23.input_layernorm.weight": "model-00005-of-00007.safetensors",
925
+ "decoder.model.layers.23.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
926
+ "decoder.model.layers.23.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
927
+ "decoder.model.layers.23.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
928
+ "decoder.model.layers.23.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
929
+ "decoder.model.layers.23.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
930
+ "decoder.model.layers.23.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
931
+ "decoder.model.layers.23.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
932
+ "decoder.model.layers.23.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
933
+ "decoder.model.layers.24.input_layernorm.weight": "model-00005-of-00007.safetensors",
934
+ "decoder.model.layers.24.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
935
+ "decoder.model.layers.24.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
936
+ "decoder.model.layers.24.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
937
+ "decoder.model.layers.24.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
938
+ "decoder.model.layers.24.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
939
+ "decoder.model.layers.24.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
940
+ "decoder.model.layers.24.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
941
+ "decoder.model.layers.24.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
942
+ "decoder.model.layers.25.input_layernorm.weight": "model-00005-of-00007.safetensors",
943
+ "decoder.model.layers.25.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
944
+ "decoder.model.layers.25.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
945
+ "decoder.model.layers.25.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
946
+ "decoder.model.layers.25.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
947
+ "decoder.model.layers.25.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
948
+ "decoder.model.layers.25.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
949
+ "decoder.model.layers.25.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
950
+ "decoder.model.layers.25.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
951
+ "decoder.model.layers.26.input_layernorm.weight": "model-00005-of-00007.safetensors",
952
+ "decoder.model.layers.26.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
953
+ "decoder.model.layers.26.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
954
+ "decoder.model.layers.26.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
955
+ "decoder.model.layers.26.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
956
+ "decoder.model.layers.26.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
957
+ "decoder.model.layers.26.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
958
+ "decoder.model.layers.26.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
959
+ "decoder.model.layers.26.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
960
+ "decoder.model.layers.27.input_layernorm.weight": "model-00005-of-00007.safetensors",
961
+ "decoder.model.layers.27.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
962
+ "decoder.model.layers.27.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
963
+ "decoder.model.layers.27.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
964
+ "decoder.model.layers.27.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
965
+ "decoder.model.layers.27.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
966
+ "decoder.model.layers.27.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
967
+ "decoder.model.layers.27.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
968
+ "decoder.model.layers.27.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
969
+ "decoder.model.layers.28.input_layernorm.weight": "model-00006-of-00007.safetensors",
970
+ "decoder.model.layers.28.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
971
+ "decoder.model.layers.28.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
972
+ "decoder.model.layers.28.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
973
+ "decoder.model.layers.28.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
974
+ "decoder.model.layers.28.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
975
+ "decoder.model.layers.28.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
976
+ "decoder.model.layers.28.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
977
+ "decoder.model.layers.28.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
978
+ "decoder.model.layers.29.input_layernorm.weight": "model-00006-of-00007.safetensors",
979
+ "decoder.model.layers.29.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
980
+ "decoder.model.layers.29.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
981
+ "decoder.model.layers.29.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
982
+ "decoder.model.layers.29.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
983
+ "decoder.model.layers.29.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
984
+ "decoder.model.layers.29.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
985
+ "decoder.model.layers.29.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
986
+ "decoder.model.layers.29.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
987
+ "decoder.model.layers.3.input_layernorm.weight": "model-00001-of-00007.safetensors",
988
+ "decoder.model.layers.3.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
989
+ "decoder.model.layers.3.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
990
+ "decoder.model.layers.3.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
991
+ "decoder.model.layers.3.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
992
+ "decoder.model.layers.3.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
993
+ "decoder.model.layers.3.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
994
+ "decoder.model.layers.3.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
995
+ "decoder.model.layers.3.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
996
+ "decoder.model.layers.30.input_layernorm.weight": "model-00006-of-00007.safetensors",
997
+ "decoder.model.layers.30.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
998
+ "decoder.model.layers.30.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
999
+ "decoder.model.layers.30.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
1000
+ "decoder.model.layers.30.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
1001
+ "decoder.model.layers.30.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
1002
+ "decoder.model.layers.30.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
1003
+ "decoder.model.layers.30.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
1004
+ "decoder.model.layers.30.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
1005
+ "decoder.model.layers.31.input_layernorm.weight": "model-00006-of-00007.safetensors",
1006
+ "decoder.model.layers.31.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
1007
+ "decoder.model.layers.31.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
1008
+ "decoder.model.layers.31.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
1009
+ "decoder.model.layers.31.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
1010
+ "decoder.model.layers.31.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
1011
+ "decoder.model.layers.31.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
1012
+ "decoder.model.layers.31.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
1013
+ "decoder.model.layers.31.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
1014
+ "decoder.model.layers.4.input_layernorm.weight": "model-00002-of-00007.safetensors",
1015
+ "decoder.model.layers.4.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
1016
+ "decoder.model.layers.4.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
1017
+ "decoder.model.layers.4.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
1018
+ "decoder.model.layers.4.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
1019
+ "decoder.model.layers.4.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
1020
+ "decoder.model.layers.4.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
1021
+ "decoder.model.layers.4.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
1022
+ "decoder.model.layers.4.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
1023
+ "decoder.model.layers.5.input_layernorm.weight": "model-00002-of-00007.safetensors",
1024
+ "decoder.model.layers.5.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
1025
+ "decoder.model.layers.5.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
1026
+ "decoder.model.layers.5.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
1027
+ "decoder.model.layers.5.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
1028
+ "decoder.model.layers.5.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
1029
+ "decoder.model.layers.5.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
1030
+ "decoder.model.layers.5.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
1031
+ "decoder.model.layers.5.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
1032
+ "decoder.model.layers.6.input_layernorm.weight": "model-00002-of-00007.safetensors",
1033
+ "decoder.model.layers.6.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
1034
+ "decoder.model.layers.6.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
1035
+ "decoder.model.layers.6.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
1036
+ "decoder.model.layers.6.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
1037
+ "decoder.model.layers.6.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
1038
+ "decoder.model.layers.6.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
1039
+ "decoder.model.layers.6.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
1040
+ "decoder.model.layers.6.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
1041
+ "decoder.model.layers.7.input_layernorm.weight": "model-00002-of-00007.safetensors",
1042
+ "decoder.model.layers.7.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
1043
+ "decoder.model.layers.7.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
1044
+ "decoder.model.layers.7.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
1045
+ "decoder.model.layers.7.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
1046
+ "decoder.model.layers.7.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
1047
+ "decoder.model.layers.7.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
1048
+ "decoder.model.layers.7.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
1049
+ "decoder.model.layers.7.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
1050
+ "decoder.model.layers.8.input_layernorm.weight": "model-00002-of-00007.safetensors",
1051
+ "decoder.model.layers.8.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
1052
+ "decoder.model.layers.8.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
1053
+ "decoder.model.layers.8.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
1054
+ "decoder.model.layers.8.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
1055
+ "decoder.model.layers.8.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
1056
+ "decoder.model.layers.8.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
1057
+ "decoder.model.layers.8.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
1058
+ "decoder.model.layers.8.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
1059
+ "decoder.model.layers.9.input_layernorm.weight": "model-00002-of-00007.safetensors",
1060
+ "decoder.model.layers.9.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
1061
+ "decoder.model.layers.9.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
1062
+ "decoder.model.layers.9.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
1063
+ "decoder.model.layers.9.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
1064
+ "decoder.model.layers.9.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
1065
+ "decoder.model.layers.9.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
1066
+ "decoder.model.layers.9.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
1067
+ "decoder.model.layers.9.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
1068
+ "decoder.model.norm.weight": "model-00006-of-00007.safetensors",
1069
  "encoder.conv1.bias": "model-00006-of-00007.safetensors",
1070
  "encoder.conv1.weight": "model-00006-of-00007.safetensors",
1071
  "encoder.conv2.bias": "model-00006-of-00007.safetensors",
 
1225
  "encoder.layers.17.self_attn_layer_norm.weight": "model-00006-of-00007.safetensors",
1226
  "encoder.layers.18.fc1.bias": "model-00006-of-00007.safetensors",
1227
  "encoder.layers.18.fc1.weight": "model-00006-of-00007.safetensors",
1228
+ "encoder.layers.18.fc2.bias": "model-00007-of-00007.safetensors",
1229
+ "encoder.layers.18.fc2.weight": "model-00007-of-00007.safetensors",
1230
+ "encoder.layers.18.final_layer_norm.bias": "model-00007-of-00007.safetensors",
1231
+ "encoder.layers.18.final_layer_norm.weight": "model-00007-of-00007.safetensors",
1232
  "encoder.layers.18.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
1233
  "encoder.layers.18.self_attn.out_proj.bias": "model-00006-of-00007.safetensors",
1234
  "encoder.layers.18.self_attn.out_proj.weight": "model-00006-of-00007.safetensors",
 
1238
  "encoder.layers.18.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
1239
  "encoder.layers.18.self_attn_layer_norm.bias": "model-00006-of-00007.safetensors",
1240
  "encoder.layers.18.self_attn_layer_norm.weight": "model-00006-of-00007.safetensors",
1241
+ "encoder.layers.19.fc1.bias": "model-00007-of-00007.safetensors",
1242
+ "encoder.layers.19.fc1.weight": "model-00007-of-00007.safetensors",
1243
+ "encoder.layers.19.fc2.bias": "model-00007-of-00007.safetensors",
1244
+ "encoder.layers.19.fc2.weight": "model-00007-of-00007.safetensors",
1245
+ "encoder.layers.19.final_layer_norm.bias": "model-00007-of-00007.safetensors",
1246
+ "encoder.layers.19.final_layer_norm.weight": "model-00007-of-00007.safetensors",
1247
+ "encoder.layers.19.self_attn.k_proj.weight": "model-00007-of-00007.safetensors",
1248
+ "encoder.layers.19.self_attn.out_proj.bias": "model-00007-of-00007.safetensors",
1249
+ "encoder.layers.19.self_attn.out_proj.weight": "model-00007-of-00007.safetensors",
1250
+ "encoder.layers.19.self_attn.q_proj.bias": "model-00007-of-00007.safetensors",
1251
+ "encoder.layers.19.self_attn.q_proj.weight": "model-00007-of-00007.safetensors",
1252
+ "encoder.layers.19.self_attn.v_proj.bias": "model-00007-of-00007.safetensors",
1253
+ "encoder.layers.19.self_attn.v_proj.weight": "model-00007-of-00007.safetensors",
1254
+ "encoder.layers.19.self_attn_layer_norm.bias": "model-00007-of-00007.safetensors",
1255
+ "encoder.layers.19.self_attn_layer_norm.weight": "model-00007-of-00007.safetensors",
1256
  "encoder.layers.2.fc1.bias": "model-00006-of-00007.safetensors",
1257
  "encoder.layers.2.fc1.weight": "model-00006-of-00007.safetensors",
1258
  "encoder.layers.2.fc2.bias": "model-00006-of-00007.safetensors",
 
1268
  "encoder.layers.2.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
1269
  "encoder.layers.2.self_attn_layer_norm.bias": "model-00006-of-00007.safetensors",
1270
  "encoder.layers.2.self_attn_layer_norm.weight": "model-00006-of-00007.safetensors",
1271
+ "encoder.layers.20.fc1.bias": "model-00007-of-00007.safetensors",
1272
+ "encoder.layers.20.fc1.weight": "model-00007-of-00007.safetensors",
1273
+ "encoder.layers.20.fc2.bias": "model-00007-of-00007.safetensors",
1274
+ "encoder.layers.20.fc2.weight": "model-00007-of-00007.safetensors",
1275
+ "encoder.layers.20.final_layer_norm.bias": "model-00007-of-00007.safetensors",
1276
+ "encoder.layers.20.final_layer_norm.weight": "model-00007-of-00007.safetensors",
1277
+ "encoder.layers.20.self_attn.k_proj.weight": "model-00007-of-00007.safetensors",
1278
+ "encoder.layers.20.self_attn.out_proj.bias": "model-00007-of-00007.safetensors",
1279
+ "encoder.layers.20.self_attn.out_proj.weight": "model-00007-of-00007.safetensors",
1280
+ "encoder.layers.20.self_attn.q_proj.bias": "model-00007-of-00007.safetensors",
1281
+ "encoder.layers.20.self_attn.q_proj.weight": "model-00007-of-00007.safetensors",
1282
+ "encoder.layers.20.self_attn.v_proj.bias": "model-00007-of-00007.safetensors",
1283
+ "encoder.layers.20.self_attn.v_proj.weight": "model-00007-of-00007.safetensors",
1284
+ "encoder.layers.20.self_attn_layer_norm.bias": "model-00007-of-00007.safetensors",
1285
+ "encoder.layers.20.self_attn_layer_norm.weight": "model-00007-of-00007.safetensors",
1286
+ "encoder.layers.21.fc1.bias": "model-00007-of-00007.safetensors",
1287
+ "encoder.layers.21.fc1.weight": "model-00007-of-00007.safetensors",
1288
+ "encoder.layers.21.fc2.bias": "model-00007-of-00007.safetensors",
1289
+ "encoder.layers.21.fc2.weight": "model-00007-of-00007.safetensors",
1290
+ "encoder.layers.21.final_layer_norm.bias": "model-00007-of-00007.safetensors",
1291
+ "encoder.layers.21.final_layer_norm.weight": "model-00007-of-00007.safetensors",
1292
+ "encoder.layers.21.self_attn.k_proj.weight": "model-00007-of-00007.safetensors",
1293
+ "encoder.layers.21.self_attn.out_proj.bias": "model-00007-of-00007.safetensors",
1294
+ "encoder.layers.21.self_attn.out_proj.weight": "model-00007-of-00007.safetensors",
1295
+ "encoder.layers.21.self_attn.q_proj.bias": "model-00007-of-00007.safetensors",
1296
+ "encoder.layers.21.self_attn.q_proj.weight": "model-00007-of-00007.safetensors",
1297
+ "encoder.layers.21.self_attn.v_proj.bias": "model-00007-of-00007.safetensors",
1298
+ "encoder.layers.21.self_attn.v_proj.weight": "model-00007-of-00007.safetensors",
1299
+ "encoder.layers.21.self_attn_layer_norm.bias": "model-00007-of-00007.safetensors",
1300
+ "encoder.layers.21.self_attn_layer_norm.weight": "model-00007-of-00007.safetensors",
1301
+ "encoder.layers.22.fc1.bias": "model-00007-of-00007.safetensors",
1302
+ "encoder.layers.22.fc1.weight": "model-00007-of-00007.safetensors",
1303
+ "encoder.layers.22.fc2.bias": "model-00007-of-00007.safetensors",
1304
+ "encoder.layers.22.fc2.weight": "model-00007-of-00007.safetensors",
1305
+ "encoder.layers.22.final_layer_norm.bias": "model-00007-of-00007.safetensors",
1306
+ "encoder.layers.22.final_layer_norm.weight": "model-00007-of-00007.safetensors",
1307
+ "encoder.layers.22.self_attn.k_proj.weight": "model-00007-of-00007.safetensors",
1308
+ "encoder.layers.22.self_attn.out_proj.bias": "model-00007-of-00007.safetensors",
1309
+ "encoder.layers.22.self_attn.out_proj.weight": "model-00007-of-00007.safetensors",
1310
+ "encoder.layers.22.self_attn.q_proj.bias": "model-00007-of-00007.safetensors",
1311
+ "encoder.layers.22.self_attn.q_proj.weight": "model-00007-of-00007.safetensors",
1312
+ "encoder.layers.22.self_attn.v_proj.bias": "model-00007-of-00007.safetensors",
1313
+ "encoder.layers.22.self_attn.v_proj.weight": "model-00007-of-00007.safetensors",
1314
+ "encoder.layers.22.self_attn_layer_norm.bias": "model-00007-of-00007.safetensors",
1315
+ "encoder.layers.22.self_attn_layer_norm.weight": "model-00007-of-00007.safetensors",
1316
+ "encoder.layers.23.fc1.bias": "model-00007-of-00007.safetensors",
1317
+ "encoder.layers.23.fc1.weight": "model-00007-of-00007.safetensors",
1318
+ "encoder.layers.23.fc2.bias": "model-00007-of-00007.safetensors",
1319
+ "encoder.layers.23.fc2.weight": "model-00007-of-00007.safetensors",
1320
+ "encoder.layers.23.final_layer_norm.bias": "model-00007-of-00007.safetensors",
1321
+ "encoder.layers.23.final_layer_norm.weight": "model-00007-of-00007.safetensors",
1322
+ "encoder.layers.23.self_attn.k_proj.weight": "model-00007-of-00007.safetensors",
1323
+ "encoder.layers.23.self_attn.out_proj.bias": "model-00007-of-00007.safetensors",
1324
+ "encoder.layers.23.self_attn.out_proj.weight": "model-00007-of-00007.safetensors",
1325
+ "encoder.layers.23.self_attn.q_proj.bias": "model-00007-of-00007.safetensors",
1326
+ "encoder.layers.23.self_attn.q_proj.weight": "model-00007-of-00007.safetensors",
1327
+ "encoder.layers.23.self_attn.v_proj.bias": "model-00007-of-00007.safetensors",
1328
+ "encoder.layers.23.self_attn.v_proj.weight": "model-00007-of-00007.safetensors",
1329
+ "encoder.layers.23.self_attn_layer_norm.bias": "model-00007-of-00007.safetensors",
1330
+ "encoder.layers.23.self_attn_layer_norm.weight": "model-00007-of-00007.safetensors",
1331
+ "encoder.layers.24.fc1.bias": "model-00007-of-00007.safetensors",
1332
+ "encoder.layers.24.fc1.weight": "model-00007-of-00007.safetensors",
1333
+ "encoder.layers.24.fc2.bias": "model-00007-of-00007.safetensors",
1334
+ "encoder.layers.24.fc2.weight": "model-00007-of-00007.safetensors",
1335
+ "encoder.layers.24.final_layer_norm.bias": "model-00007-of-00007.safetensors",
1336
+ "encoder.layers.24.final_layer_norm.weight": "model-00007-of-00007.safetensors",
1337
+ "encoder.layers.24.self_attn.k_proj.weight": "model-00007-of-00007.safetensors",
1338
+ "encoder.layers.24.self_attn.out_proj.bias": "model-00007-of-00007.safetensors",
1339
+ "encoder.layers.24.self_attn.out_proj.weight": "model-00007-of-00007.safetensors",
1340
+ "encoder.layers.24.self_attn.q_proj.bias": "model-00007-of-00007.safetensors",
1341
+ "encoder.layers.24.self_attn.q_proj.weight": "model-00007-of-00007.safetensors",
1342
+ "encoder.layers.24.self_attn.v_proj.bias": "model-00007-of-00007.safetensors",
1343
+ "encoder.layers.24.self_attn.v_proj.weight": "model-00007-of-00007.safetensors",
1344
+ "encoder.layers.24.self_attn_layer_norm.bias": "model-00007-of-00007.safetensors",
1345
+ "encoder.layers.24.self_attn_layer_norm.weight": "model-00007-of-00007.safetensors",
1346
  "encoder.layers.25.fc1.bias": "model-00007-of-00007.safetensors",
1347
  "encoder.layers.25.fc1.weight": "model-00007-of-00007.safetensors",
1348
  "encoder.layers.25.fc2.bias": "model-00007-of-00007.safetensors",
1349
  "encoder.layers.25.fc2.weight": "model-00007-of-00007.safetensors",
1350
  "encoder.layers.25.final_layer_norm.bias": "model-00007-of-00007.safetensors",
1351
  "encoder.layers.25.final_layer_norm.weight": "model-00007-of-00007.safetensors",
1352
+ "encoder.layers.25.self_attn.k_proj.weight": "model-00007-of-00007.safetensors",
1353
+ "encoder.layers.25.self_attn.out_proj.bias": "model-00007-of-00007.safetensors",
1354
+ "encoder.layers.25.self_attn.out_proj.weight": "model-00007-of-00007.safetensors",
1355
+ "encoder.layers.25.self_attn.q_proj.bias": "model-00007-of-00007.safetensors",
1356
+ "encoder.layers.25.self_attn.q_proj.weight": "model-00007-of-00007.safetensors",
1357
+ "encoder.layers.25.self_attn.v_proj.bias": "model-00007-of-00007.safetensors",
1358
+ "encoder.layers.25.self_attn.v_proj.weight": "model-00007-of-00007.safetensors",
1359
+ "encoder.layers.25.self_attn_layer_norm.bias": "model-00007-of-00007.safetensors",
1360
+ "encoder.layers.25.self_attn_layer_norm.weight": "model-00007-of-00007.safetensors",
1361
  "encoder.layers.26.fc1.bias": "model-00007-of-00007.safetensors",
1362
  "encoder.layers.26.fc1.weight": "model-00007-of-00007.safetensors",
1363
  "encoder.layers.26.fc2.bias": "model-00007-of-00007.safetensors",