Upload InternVLForConditionalGeneration
Browse files- config.json +1 -0
- model-00001-of-00016.safetensors +2 -2
- model-00002-of-00016.safetensors +2 -2
- model-00003-of-00016.safetensors +2 -2
- model.safetensors.index.json +1 -46
config.json
CHANGED
@@ -37,6 +37,7 @@
|
|
37 |
"use_sliding_window": false,
|
38 |
"vocab_size": 151674
|
39 |
},
|
|
|
40 |
"transformers_version": "4.52.0.dev0",
|
41 |
"vision_config": {
|
42 |
"architectures": [
|
|
|
37 |
"use_sliding_window": false,
|
38 |
"vocab_size": 151674
|
39 |
},
|
40 |
+
"torch_dtype": "bfloat16",
|
41 |
"transformers_version": "4.52.0.dev0",
|
42 |
"vision_config": {
|
43 |
"architectures": [
|
model-00001-of-00016.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c15466cb27d9e0f4b8cbe985e743cc30a648694ed904c965565f17ec82b14045
|
3 |
+
size 4988563328
|
model-00002-of-00016.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e7840a85f648d99c34a38a7a0f94e093e6cb4fdc62901be146a5518000560ab2
|
3 |
+
size 4937273312
|
model-00003-of-00016.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90a83f8196f754901bb00a50a29deb2279cfe49db8a0d601468d1cf5bc18994a
|
3 |
+
size 4960223800
|
model.safetensors.index.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
-
"total_size":
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"language_model.lm_head.weight": "model-00016-of-00016.safetensors",
|
@@ -784,7 +784,6 @@
|
|
784 |
"vision_tower.embeddings.patch_embeddings.projection.bias": "model-00001-of-00016.safetensors",
|
785 |
"vision_tower.embeddings.patch_embeddings.projection.weight": "model-00001-of-00016.safetensors",
|
786 |
"vision_tower.embeddings.position_embeddings": "model-00001-of-00016.safetensors",
|
787 |
-
"vision_tower.encoder.layer.0.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
788 |
"vision_tower.encoder.layer.0.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
789 |
"vision_tower.encoder.layer.0.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
790 |
"vision_tower.encoder.layer.0.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
@@ -800,7 +799,6 @@
|
|
800 |
"vision_tower.encoder.layer.0.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
801 |
"vision_tower.encoder.layer.0.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
802 |
"vision_tower.encoder.layer.0.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
803 |
-
"vision_tower.encoder.layer.1.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
804 |
"vision_tower.encoder.layer.1.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
805 |
"vision_tower.encoder.layer.1.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
806 |
"vision_tower.encoder.layer.1.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
@@ -816,7 +814,6 @@
|
|
816 |
"vision_tower.encoder.layer.1.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
817 |
"vision_tower.encoder.layer.1.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
818 |
"vision_tower.encoder.layer.1.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
819 |
-
"vision_tower.encoder.layer.10.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
820 |
"vision_tower.encoder.layer.10.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
821 |
"vision_tower.encoder.layer.10.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
822 |
"vision_tower.encoder.layer.10.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
@@ -832,7 +829,6 @@
|
|
832 |
"vision_tower.encoder.layer.10.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
833 |
"vision_tower.encoder.layer.10.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
834 |
"vision_tower.encoder.layer.10.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
835 |
-
"vision_tower.encoder.layer.11.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
836 |
"vision_tower.encoder.layer.11.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
837 |
"vision_tower.encoder.layer.11.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
838 |
"vision_tower.encoder.layer.11.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
@@ -848,7 +844,6 @@
|
|
848 |
"vision_tower.encoder.layer.11.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
849 |
"vision_tower.encoder.layer.11.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
850 |
"vision_tower.encoder.layer.11.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
851 |
-
"vision_tower.encoder.layer.12.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
852 |
"vision_tower.encoder.layer.12.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
853 |
"vision_tower.encoder.layer.12.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
854 |
"vision_tower.encoder.layer.12.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
@@ -864,7 +859,6 @@
|
|
864 |
"vision_tower.encoder.layer.12.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
865 |
"vision_tower.encoder.layer.12.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
866 |
"vision_tower.encoder.layer.12.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
867 |
-
"vision_tower.encoder.layer.13.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
868 |
"vision_tower.encoder.layer.13.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
869 |
"vision_tower.encoder.layer.13.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
870 |
"vision_tower.encoder.layer.13.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
@@ -880,7 +874,6 @@
|
|
880 |
"vision_tower.encoder.layer.13.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
881 |
"vision_tower.encoder.layer.13.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
882 |
"vision_tower.encoder.layer.13.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
883 |
-
"vision_tower.encoder.layer.14.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
884 |
"vision_tower.encoder.layer.14.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
885 |
"vision_tower.encoder.layer.14.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
886 |
"vision_tower.encoder.layer.14.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
@@ -896,7 +889,6 @@
|
|
896 |
"vision_tower.encoder.layer.14.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
897 |
"vision_tower.encoder.layer.14.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
898 |
"vision_tower.encoder.layer.14.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
899 |
-
"vision_tower.encoder.layer.15.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
900 |
"vision_tower.encoder.layer.15.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
901 |
"vision_tower.encoder.layer.15.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
902 |
"vision_tower.encoder.layer.15.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
@@ -912,7 +904,6 @@
|
|
912 |
"vision_tower.encoder.layer.15.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
913 |
"vision_tower.encoder.layer.15.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
914 |
"vision_tower.encoder.layer.15.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
915 |
-
"vision_tower.encoder.layer.16.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
916 |
"vision_tower.encoder.layer.16.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
917 |
"vision_tower.encoder.layer.16.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
918 |
"vision_tower.encoder.layer.16.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
@@ -928,7 +919,6 @@
|
|
928 |
"vision_tower.encoder.layer.16.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
929 |
"vision_tower.encoder.layer.16.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
930 |
"vision_tower.encoder.layer.16.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
931 |
-
"vision_tower.encoder.layer.17.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
932 |
"vision_tower.encoder.layer.17.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
933 |
"vision_tower.encoder.layer.17.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
934 |
"vision_tower.encoder.layer.17.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
@@ -944,7 +934,6 @@
|
|
944 |
"vision_tower.encoder.layer.17.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
945 |
"vision_tower.encoder.layer.17.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
946 |
"vision_tower.encoder.layer.17.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
947 |
-
"vision_tower.encoder.layer.18.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
948 |
"vision_tower.encoder.layer.18.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
949 |
"vision_tower.encoder.layer.18.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
950 |
"vision_tower.encoder.layer.18.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
@@ -960,7 +949,6 @@
|
|
960 |
"vision_tower.encoder.layer.18.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
961 |
"vision_tower.encoder.layer.18.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
962 |
"vision_tower.encoder.layer.18.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
963 |
-
"vision_tower.encoder.layer.19.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
964 |
"vision_tower.encoder.layer.19.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
965 |
"vision_tower.encoder.layer.19.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
966 |
"vision_tower.encoder.layer.19.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
@@ -976,7 +964,6 @@
|
|
976 |
"vision_tower.encoder.layer.19.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
977 |
"vision_tower.encoder.layer.19.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
978 |
"vision_tower.encoder.layer.19.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
979 |
-
"vision_tower.encoder.layer.2.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
980 |
"vision_tower.encoder.layer.2.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
981 |
"vision_tower.encoder.layer.2.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
982 |
"vision_tower.encoder.layer.2.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
@@ -992,7 +979,6 @@
|
|
992 |
"vision_tower.encoder.layer.2.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
993 |
"vision_tower.encoder.layer.2.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
994 |
"vision_tower.encoder.layer.2.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
995 |
-
"vision_tower.encoder.layer.20.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
996 |
"vision_tower.encoder.layer.20.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
997 |
"vision_tower.encoder.layer.20.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
998 |
"vision_tower.encoder.layer.20.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
@@ -1008,7 +994,6 @@
|
|
1008 |
"vision_tower.encoder.layer.20.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1009 |
"vision_tower.encoder.layer.20.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1010 |
"vision_tower.encoder.layer.20.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
1011 |
-
"vision_tower.encoder.layer.21.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
1012 |
"vision_tower.encoder.layer.21.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1013 |
"vision_tower.encoder.layer.21.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1014 |
"vision_tower.encoder.layer.21.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
@@ -1024,7 +1009,6 @@
|
|
1024 |
"vision_tower.encoder.layer.21.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1025 |
"vision_tower.encoder.layer.21.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1026 |
"vision_tower.encoder.layer.21.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
1027 |
-
"vision_tower.encoder.layer.22.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
1028 |
"vision_tower.encoder.layer.22.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1029 |
"vision_tower.encoder.layer.22.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1030 |
"vision_tower.encoder.layer.22.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
@@ -1040,7 +1024,6 @@
|
|
1040 |
"vision_tower.encoder.layer.22.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1041 |
"vision_tower.encoder.layer.22.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1042 |
"vision_tower.encoder.layer.22.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
1043 |
-
"vision_tower.encoder.layer.23.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
1044 |
"vision_tower.encoder.layer.23.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1045 |
"vision_tower.encoder.layer.23.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1046 |
"vision_tower.encoder.layer.23.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
@@ -1056,7 +1039,6 @@
|
|
1056 |
"vision_tower.encoder.layer.23.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1057 |
"vision_tower.encoder.layer.23.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1058 |
"vision_tower.encoder.layer.23.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
1059 |
-
"vision_tower.encoder.layer.24.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
1060 |
"vision_tower.encoder.layer.24.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1061 |
"vision_tower.encoder.layer.24.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1062 |
"vision_tower.encoder.layer.24.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
@@ -1072,7 +1054,6 @@
|
|
1072 |
"vision_tower.encoder.layer.24.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1073 |
"vision_tower.encoder.layer.24.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1074 |
"vision_tower.encoder.layer.24.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
1075 |
-
"vision_tower.encoder.layer.25.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
1076 |
"vision_tower.encoder.layer.25.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1077 |
"vision_tower.encoder.layer.25.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1078 |
"vision_tower.encoder.layer.25.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
@@ -1088,7 +1069,6 @@
|
|
1088 |
"vision_tower.encoder.layer.25.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1089 |
"vision_tower.encoder.layer.25.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1090 |
"vision_tower.encoder.layer.25.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
1091 |
-
"vision_tower.encoder.layer.26.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
1092 |
"vision_tower.encoder.layer.26.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1093 |
"vision_tower.encoder.layer.26.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1094 |
"vision_tower.encoder.layer.26.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
@@ -1104,7 +1084,6 @@
|
|
1104 |
"vision_tower.encoder.layer.26.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1105 |
"vision_tower.encoder.layer.26.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1106 |
"vision_tower.encoder.layer.26.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
1107 |
-
"vision_tower.encoder.layer.27.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
1108 |
"vision_tower.encoder.layer.27.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1109 |
"vision_tower.encoder.layer.27.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1110 |
"vision_tower.encoder.layer.27.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
@@ -1120,7 +1099,6 @@
|
|
1120 |
"vision_tower.encoder.layer.27.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1121 |
"vision_tower.encoder.layer.27.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1122 |
"vision_tower.encoder.layer.27.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
1123 |
-
"vision_tower.encoder.layer.28.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
1124 |
"vision_tower.encoder.layer.28.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1125 |
"vision_tower.encoder.layer.28.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1126 |
"vision_tower.encoder.layer.28.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
@@ -1136,7 +1114,6 @@
|
|
1136 |
"vision_tower.encoder.layer.28.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1137 |
"vision_tower.encoder.layer.28.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1138 |
"vision_tower.encoder.layer.28.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
1139 |
-
"vision_tower.encoder.layer.29.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
1140 |
"vision_tower.encoder.layer.29.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1141 |
"vision_tower.encoder.layer.29.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1142 |
"vision_tower.encoder.layer.29.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
@@ -1152,7 +1129,6 @@
|
|
1152 |
"vision_tower.encoder.layer.29.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1153 |
"vision_tower.encoder.layer.29.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1154 |
"vision_tower.encoder.layer.29.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
1155 |
-
"vision_tower.encoder.layer.3.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
1156 |
"vision_tower.encoder.layer.3.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
1157 |
"vision_tower.encoder.layer.3.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
1158 |
"vision_tower.encoder.layer.3.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
@@ -1168,7 +1144,6 @@
|
|
1168 |
"vision_tower.encoder.layer.3.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
1169 |
"vision_tower.encoder.layer.3.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
1170 |
"vision_tower.encoder.layer.3.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
1171 |
-
"vision_tower.encoder.layer.30.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
1172 |
"vision_tower.encoder.layer.30.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1173 |
"vision_tower.encoder.layer.30.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1174 |
"vision_tower.encoder.layer.30.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
@@ -1184,7 +1159,6 @@
|
|
1184 |
"vision_tower.encoder.layer.30.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1185 |
"vision_tower.encoder.layer.30.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1186 |
"vision_tower.encoder.layer.30.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
1187 |
-
"vision_tower.encoder.layer.31.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
1188 |
"vision_tower.encoder.layer.31.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1189 |
"vision_tower.encoder.layer.31.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1190 |
"vision_tower.encoder.layer.31.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
@@ -1200,7 +1174,6 @@
|
|
1200 |
"vision_tower.encoder.layer.31.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1201 |
"vision_tower.encoder.layer.31.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1202 |
"vision_tower.encoder.layer.31.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
1203 |
-
"vision_tower.encoder.layer.32.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
1204 |
"vision_tower.encoder.layer.32.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1205 |
"vision_tower.encoder.layer.32.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1206 |
"vision_tower.encoder.layer.32.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
@@ -1216,7 +1189,6 @@
|
|
1216 |
"vision_tower.encoder.layer.32.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1217 |
"vision_tower.encoder.layer.32.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1218 |
"vision_tower.encoder.layer.32.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
1219 |
-
"vision_tower.encoder.layer.33.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
1220 |
"vision_tower.encoder.layer.33.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1221 |
"vision_tower.encoder.layer.33.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1222 |
"vision_tower.encoder.layer.33.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
@@ -1232,7 +1204,6 @@
|
|
1232 |
"vision_tower.encoder.layer.33.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1233 |
"vision_tower.encoder.layer.33.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1234 |
"vision_tower.encoder.layer.33.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
1235 |
-
"vision_tower.encoder.layer.34.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
1236 |
"vision_tower.encoder.layer.34.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1237 |
"vision_tower.encoder.layer.34.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1238 |
"vision_tower.encoder.layer.34.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
@@ -1248,7 +1219,6 @@
|
|
1248 |
"vision_tower.encoder.layer.34.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1249 |
"vision_tower.encoder.layer.34.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1250 |
"vision_tower.encoder.layer.34.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
1251 |
-
"vision_tower.encoder.layer.35.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
1252 |
"vision_tower.encoder.layer.35.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1253 |
"vision_tower.encoder.layer.35.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1254 |
"vision_tower.encoder.layer.35.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
@@ -1264,7 +1234,6 @@
|
|
1264 |
"vision_tower.encoder.layer.35.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1265 |
"vision_tower.encoder.layer.35.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1266 |
"vision_tower.encoder.layer.35.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
1267 |
-
"vision_tower.encoder.layer.36.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
1268 |
"vision_tower.encoder.layer.36.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1269 |
"vision_tower.encoder.layer.36.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1270 |
"vision_tower.encoder.layer.36.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
@@ -1280,7 +1249,6 @@
|
|
1280 |
"vision_tower.encoder.layer.36.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1281 |
"vision_tower.encoder.layer.36.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1282 |
"vision_tower.encoder.layer.36.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
1283 |
-
"vision_tower.encoder.layer.37.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
1284 |
"vision_tower.encoder.layer.37.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1285 |
"vision_tower.encoder.layer.37.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1286 |
"vision_tower.encoder.layer.37.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
@@ -1296,7 +1264,6 @@
|
|
1296 |
"vision_tower.encoder.layer.37.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1297 |
"vision_tower.encoder.layer.37.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1298 |
"vision_tower.encoder.layer.37.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
1299 |
-
"vision_tower.encoder.layer.38.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
1300 |
"vision_tower.encoder.layer.38.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1301 |
"vision_tower.encoder.layer.38.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1302 |
"vision_tower.encoder.layer.38.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
@@ -1312,7 +1279,6 @@
|
|
1312 |
"vision_tower.encoder.layer.38.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1313 |
"vision_tower.encoder.layer.38.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1314 |
"vision_tower.encoder.layer.38.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
1315 |
-
"vision_tower.encoder.layer.39.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
1316 |
"vision_tower.encoder.layer.39.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1317 |
"vision_tower.encoder.layer.39.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1318 |
"vision_tower.encoder.layer.39.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
@@ -1328,7 +1294,6 @@
|
|
1328 |
"vision_tower.encoder.layer.39.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1329 |
"vision_tower.encoder.layer.39.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1330 |
"vision_tower.encoder.layer.39.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
1331 |
-
"vision_tower.encoder.layer.4.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
1332 |
"vision_tower.encoder.layer.4.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
1333 |
"vision_tower.encoder.layer.4.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
1334 |
"vision_tower.encoder.layer.4.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
@@ -1344,7 +1309,6 @@
|
|
1344 |
"vision_tower.encoder.layer.4.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
1345 |
"vision_tower.encoder.layer.4.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
1346 |
"vision_tower.encoder.layer.4.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
1347 |
-
"vision_tower.encoder.layer.40.attention.k_norm.bias": "model-00002-of-00016.safetensors",
|
1348 |
"vision_tower.encoder.layer.40.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1349 |
"vision_tower.encoder.layer.40.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1350 |
"vision_tower.encoder.layer.40.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
@@ -1360,7 +1324,6 @@
|
|
1360 |
"vision_tower.encoder.layer.40.mlp.fc1.weight": "model-00003-of-00016.safetensors",
|
1361 |
"vision_tower.encoder.layer.40.mlp.fc2.bias": "model-00003-of-00016.safetensors",
|
1362 |
"vision_tower.encoder.layer.40.mlp.fc2.weight": "model-00003-of-00016.safetensors",
|
1363 |
-
"vision_tower.encoder.layer.41.attention.k_norm.bias": "model-00003-of-00016.safetensors",
|
1364 |
"vision_tower.encoder.layer.41.attention.k_norm.weight": "model-00003-of-00016.safetensors",
|
1365 |
"vision_tower.encoder.layer.41.attention.k_proj.weight": "model-00003-of-00016.safetensors",
|
1366 |
"vision_tower.encoder.layer.41.attention.projection_layer.bias": "model-00003-of-00016.safetensors",
|
@@ -1376,7 +1339,6 @@
|
|
1376 |
"vision_tower.encoder.layer.41.mlp.fc1.weight": "model-00003-of-00016.safetensors",
|
1377 |
"vision_tower.encoder.layer.41.mlp.fc2.bias": "model-00003-of-00016.safetensors",
|
1378 |
"vision_tower.encoder.layer.41.mlp.fc2.weight": "model-00003-of-00016.safetensors",
|
1379 |
-
"vision_tower.encoder.layer.42.attention.k_norm.bias": "model-00003-of-00016.safetensors",
|
1380 |
"vision_tower.encoder.layer.42.attention.k_norm.weight": "model-00003-of-00016.safetensors",
|
1381 |
"vision_tower.encoder.layer.42.attention.k_proj.weight": "model-00003-of-00016.safetensors",
|
1382 |
"vision_tower.encoder.layer.42.attention.projection_layer.bias": "model-00003-of-00016.safetensors",
|
@@ -1392,7 +1354,6 @@
|
|
1392 |
"vision_tower.encoder.layer.42.mlp.fc1.weight": "model-00003-of-00016.safetensors",
|
1393 |
"vision_tower.encoder.layer.42.mlp.fc2.bias": "model-00003-of-00016.safetensors",
|
1394 |
"vision_tower.encoder.layer.42.mlp.fc2.weight": "model-00003-of-00016.safetensors",
|
1395 |
-
"vision_tower.encoder.layer.43.attention.k_norm.bias": "model-00003-of-00016.safetensors",
|
1396 |
"vision_tower.encoder.layer.43.attention.k_norm.weight": "model-00003-of-00016.safetensors",
|
1397 |
"vision_tower.encoder.layer.43.attention.k_proj.weight": "model-00003-of-00016.safetensors",
|
1398 |
"vision_tower.encoder.layer.43.attention.projection_layer.bias": "model-00003-of-00016.safetensors",
|
@@ -1408,7 +1369,6 @@
|
|
1408 |
"vision_tower.encoder.layer.43.mlp.fc1.weight": "model-00003-of-00016.safetensors",
|
1409 |
"vision_tower.encoder.layer.43.mlp.fc2.bias": "model-00003-of-00016.safetensors",
|
1410 |
"vision_tower.encoder.layer.43.mlp.fc2.weight": "model-00003-of-00016.safetensors",
|
1411 |
-
"vision_tower.encoder.layer.44.attention.k_norm.bias": "model-00003-of-00016.safetensors",
|
1412 |
"vision_tower.encoder.layer.44.attention.k_norm.weight": "model-00003-of-00016.safetensors",
|
1413 |
"vision_tower.encoder.layer.44.attention.k_proj.weight": "model-00003-of-00016.safetensors",
|
1414 |
"vision_tower.encoder.layer.44.attention.projection_layer.bias": "model-00003-of-00016.safetensors",
|
@@ -1424,7 +1384,6 @@
|
|
1424 |
"vision_tower.encoder.layer.44.mlp.fc1.weight": "model-00003-of-00016.safetensors",
|
1425 |
"vision_tower.encoder.layer.44.mlp.fc2.bias": "model-00003-of-00016.safetensors",
|
1426 |
"vision_tower.encoder.layer.44.mlp.fc2.weight": "model-00003-of-00016.safetensors",
|
1427 |
-
"vision_tower.encoder.layer.5.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
1428 |
"vision_tower.encoder.layer.5.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
1429 |
"vision_tower.encoder.layer.5.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
1430 |
"vision_tower.encoder.layer.5.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
@@ -1440,7 +1399,6 @@
|
|
1440 |
"vision_tower.encoder.layer.5.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
1441 |
"vision_tower.encoder.layer.5.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
1442 |
"vision_tower.encoder.layer.5.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
1443 |
-
"vision_tower.encoder.layer.6.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
1444 |
"vision_tower.encoder.layer.6.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
1445 |
"vision_tower.encoder.layer.6.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
1446 |
"vision_tower.encoder.layer.6.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
@@ -1456,7 +1414,6 @@
|
|
1456 |
"vision_tower.encoder.layer.6.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
1457 |
"vision_tower.encoder.layer.6.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
1458 |
"vision_tower.encoder.layer.6.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
1459 |
-
"vision_tower.encoder.layer.7.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
1460 |
"vision_tower.encoder.layer.7.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
1461 |
"vision_tower.encoder.layer.7.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
1462 |
"vision_tower.encoder.layer.7.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
@@ -1472,7 +1429,6 @@
|
|
1472 |
"vision_tower.encoder.layer.7.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
1473 |
"vision_tower.encoder.layer.7.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
1474 |
"vision_tower.encoder.layer.7.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
1475 |
-
"vision_tower.encoder.layer.8.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
1476 |
"vision_tower.encoder.layer.8.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
1477 |
"vision_tower.encoder.layer.8.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
1478 |
"vision_tower.encoder.layer.8.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
@@ -1488,7 +1444,6 @@
|
|
1488 |
"vision_tower.encoder.layer.8.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
1489 |
"vision_tower.encoder.layer.8.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
1490 |
"vision_tower.encoder.layer.8.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
1491 |
-
"vision_tower.encoder.layer.9.attention.k_norm.bias": "model-00001-of-00016.safetensors",
|
1492 |
"vision_tower.encoder.layer.9.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
1493 |
"vision_tower.encoder.layer.9.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
1494 |
"vision_tower.encoder.layer.9.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
+
"total_size": 76776329984
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"language_model.lm_head.weight": "model-00016-of-00016.safetensors",
|
|
|
784 |
"vision_tower.embeddings.patch_embeddings.projection.bias": "model-00001-of-00016.safetensors",
|
785 |
"vision_tower.embeddings.patch_embeddings.projection.weight": "model-00001-of-00016.safetensors",
|
786 |
"vision_tower.embeddings.position_embeddings": "model-00001-of-00016.safetensors",
|
|
|
787 |
"vision_tower.encoder.layer.0.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
788 |
"vision_tower.encoder.layer.0.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
789 |
"vision_tower.encoder.layer.0.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
799 |
"vision_tower.encoder.layer.0.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
800 |
"vision_tower.encoder.layer.0.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
801 |
"vision_tower.encoder.layer.0.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
802 |
"vision_tower.encoder.layer.1.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
803 |
"vision_tower.encoder.layer.1.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
804 |
"vision_tower.encoder.layer.1.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
814 |
"vision_tower.encoder.layer.1.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
815 |
"vision_tower.encoder.layer.1.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
816 |
"vision_tower.encoder.layer.1.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
817 |
"vision_tower.encoder.layer.10.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
818 |
"vision_tower.encoder.layer.10.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
819 |
"vision_tower.encoder.layer.10.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
829 |
"vision_tower.encoder.layer.10.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
830 |
"vision_tower.encoder.layer.10.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
831 |
"vision_tower.encoder.layer.10.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
832 |
"vision_tower.encoder.layer.11.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
833 |
"vision_tower.encoder.layer.11.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
834 |
"vision_tower.encoder.layer.11.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
844 |
"vision_tower.encoder.layer.11.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
845 |
"vision_tower.encoder.layer.11.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
846 |
"vision_tower.encoder.layer.11.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
847 |
"vision_tower.encoder.layer.12.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
848 |
"vision_tower.encoder.layer.12.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
849 |
"vision_tower.encoder.layer.12.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
859 |
"vision_tower.encoder.layer.12.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
860 |
"vision_tower.encoder.layer.12.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
861 |
"vision_tower.encoder.layer.12.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
862 |
"vision_tower.encoder.layer.13.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
863 |
"vision_tower.encoder.layer.13.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
864 |
"vision_tower.encoder.layer.13.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
874 |
"vision_tower.encoder.layer.13.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
875 |
"vision_tower.encoder.layer.13.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
876 |
"vision_tower.encoder.layer.13.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
877 |
"vision_tower.encoder.layer.14.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
878 |
"vision_tower.encoder.layer.14.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
879 |
"vision_tower.encoder.layer.14.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
889 |
"vision_tower.encoder.layer.14.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
890 |
"vision_tower.encoder.layer.14.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
891 |
"vision_tower.encoder.layer.14.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
892 |
"vision_tower.encoder.layer.15.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
893 |
"vision_tower.encoder.layer.15.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
894 |
"vision_tower.encoder.layer.15.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
904 |
"vision_tower.encoder.layer.15.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
905 |
"vision_tower.encoder.layer.15.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
906 |
"vision_tower.encoder.layer.15.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
907 |
"vision_tower.encoder.layer.16.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
908 |
"vision_tower.encoder.layer.16.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
909 |
"vision_tower.encoder.layer.16.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
919 |
"vision_tower.encoder.layer.16.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
920 |
"vision_tower.encoder.layer.16.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
921 |
"vision_tower.encoder.layer.16.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
922 |
"vision_tower.encoder.layer.17.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
923 |
"vision_tower.encoder.layer.17.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
924 |
"vision_tower.encoder.layer.17.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
934 |
"vision_tower.encoder.layer.17.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
935 |
"vision_tower.encoder.layer.17.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
936 |
"vision_tower.encoder.layer.17.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
937 |
"vision_tower.encoder.layer.18.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
938 |
"vision_tower.encoder.layer.18.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
939 |
"vision_tower.encoder.layer.18.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
949 |
"vision_tower.encoder.layer.18.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
950 |
"vision_tower.encoder.layer.18.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
951 |
"vision_tower.encoder.layer.18.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
952 |
"vision_tower.encoder.layer.19.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
953 |
"vision_tower.encoder.layer.19.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
954 |
"vision_tower.encoder.layer.19.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
964 |
"vision_tower.encoder.layer.19.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
965 |
"vision_tower.encoder.layer.19.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
966 |
"vision_tower.encoder.layer.19.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
967 |
"vision_tower.encoder.layer.2.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
968 |
"vision_tower.encoder.layer.2.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
969 |
"vision_tower.encoder.layer.2.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
979 |
"vision_tower.encoder.layer.2.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
980 |
"vision_tower.encoder.layer.2.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
981 |
"vision_tower.encoder.layer.2.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
982 |
"vision_tower.encoder.layer.20.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
983 |
"vision_tower.encoder.layer.20.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
984 |
"vision_tower.encoder.layer.20.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
994 |
"vision_tower.encoder.layer.20.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
995 |
"vision_tower.encoder.layer.20.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
996 |
"vision_tower.encoder.layer.20.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
997 |
"vision_tower.encoder.layer.21.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
998 |
"vision_tower.encoder.layer.21.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
999 |
"vision_tower.encoder.layer.21.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
1009 |
"vision_tower.encoder.layer.21.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1010 |
"vision_tower.encoder.layer.21.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1011 |
"vision_tower.encoder.layer.21.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
1012 |
"vision_tower.encoder.layer.22.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1013 |
"vision_tower.encoder.layer.22.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1014 |
"vision_tower.encoder.layer.22.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
1024 |
"vision_tower.encoder.layer.22.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1025 |
"vision_tower.encoder.layer.22.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1026 |
"vision_tower.encoder.layer.22.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
1027 |
"vision_tower.encoder.layer.23.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1028 |
"vision_tower.encoder.layer.23.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1029 |
"vision_tower.encoder.layer.23.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
1039 |
"vision_tower.encoder.layer.23.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1040 |
"vision_tower.encoder.layer.23.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1041 |
"vision_tower.encoder.layer.23.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
1042 |
"vision_tower.encoder.layer.24.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1043 |
"vision_tower.encoder.layer.24.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1044 |
"vision_tower.encoder.layer.24.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
1054 |
"vision_tower.encoder.layer.24.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1055 |
"vision_tower.encoder.layer.24.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1056 |
"vision_tower.encoder.layer.24.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
1057 |
"vision_tower.encoder.layer.25.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1058 |
"vision_tower.encoder.layer.25.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1059 |
"vision_tower.encoder.layer.25.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
1069 |
"vision_tower.encoder.layer.25.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1070 |
"vision_tower.encoder.layer.25.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1071 |
"vision_tower.encoder.layer.25.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
1072 |
"vision_tower.encoder.layer.26.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1073 |
"vision_tower.encoder.layer.26.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1074 |
"vision_tower.encoder.layer.26.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
1084 |
"vision_tower.encoder.layer.26.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1085 |
"vision_tower.encoder.layer.26.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1086 |
"vision_tower.encoder.layer.26.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
1087 |
"vision_tower.encoder.layer.27.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1088 |
"vision_tower.encoder.layer.27.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1089 |
"vision_tower.encoder.layer.27.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
1099 |
"vision_tower.encoder.layer.27.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1100 |
"vision_tower.encoder.layer.27.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1101 |
"vision_tower.encoder.layer.27.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
1102 |
"vision_tower.encoder.layer.28.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1103 |
"vision_tower.encoder.layer.28.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1104 |
"vision_tower.encoder.layer.28.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
1114 |
"vision_tower.encoder.layer.28.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1115 |
"vision_tower.encoder.layer.28.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1116 |
"vision_tower.encoder.layer.28.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
1117 |
"vision_tower.encoder.layer.29.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1118 |
"vision_tower.encoder.layer.29.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1119 |
"vision_tower.encoder.layer.29.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
1129 |
"vision_tower.encoder.layer.29.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1130 |
"vision_tower.encoder.layer.29.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1131 |
"vision_tower.encoder.layer.29.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
1132 |
"vision_tower.encoder.layer.3.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
1133 |
"vision_tower.encoder.layer.3.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
1134 |
"vision_tower.encoder.layer.3.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
1144 |
"vision_tower.encoder.layer.3.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
1145 |
"vision_tower.encoder.layer.3.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
1146 |
"vision_tower.encoder.layer.3.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
1147 |
"vision_tower.encoder.layer.30.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1148 |
"vision_tower.encoder.layer.30.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1149 |
"vision_tower.encoder.layer.30.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
1159 |
"vision_tower.encoder.layer.30.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1160 |
"vision_tower.encoder.layer.30.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1161 |
"vision_tower.encoder.layer.30.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
1162 |
"vision_tower.encoder.layer.31.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1163 |
"vision_tower.encoder.layer.31.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1164 |
"vision_tower.encoder.layer.31.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
1174 |
"vision_tower.encoder.layer.31.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1175 |
"vision_tower.encoder.layer.31.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1176 |
"vision_tower.encoder.layer.31.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
1177 |
"vision_tower.encoder.layer.32.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1178 |
"vision_tower.encoder.layer.32.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1179 |
"vision_tower.encoder.layer.32.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
1189 |
"vision_tower.encoder.layer.32.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1190 |
"vision_tower.encoder.layer.32.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1191 |
"vision_tower.encoder.layer.32.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
1192 |
"vision_tower.encoder.layer.33.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1193 |
"vision_tower.encoder.layer.33.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1194 |
"vision_tower.encoder.layer.33.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
1204 |
"vision_tower.encoder.layer.33.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1205 |
"vision_tower.encoder.layer.33.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1206 |
"vision_tower.encoder.layer.33.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
1207 |
"vision_tower.encoder.layer.34.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1208 |
"vision_tower.encoder.layer.34.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1209 |
"vision_tower.encoder.layer.34.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
1219 |
"vision_tower.encoder.layer.34.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1220 |
"vision_tower.encoder.layer.34.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1221 |
"vision_tower.encoder.layer.34.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
1222 |
"vision_tower.encoder.layer.35.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1223 |
"vision_tower.encoder.layer.35.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1224 |
"vision_tower.encoder.layer.35.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
1234 |
"vision_tower.encoder.layer.35.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1235 |
"vision_tower.encoder.layer.35.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1236 |
"vision_tower.encoder.layer.35.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
1237 |
"vision_tower.encoder.layer.36.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1238 |
"vision_tower.encoder.layer.36.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1239 |
"vision_tower.encoder.layer.36.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
1249 |
"vision_tower.encoder.layer.36.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1250 |
"vision_tower.encoder.layer.36.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1251 |
"vision_tower.encoder.layer.36.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
1252 |
"vision_tower.encoder.layer.37.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1253 |
"vision_tower.encoder.layer.37.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1254 |
"vision_tower.encoder.layer.37.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
1264 |
"vision_tower.encoder.layer.37.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1265 |
"vision_tower.encoder.layer.37.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1266 |
"vision_tower.encoder.layer.37.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
1267 |
"vision_tower.encoder.layer.38.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1268 |
"vision_tower.encoder.layer.38.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1269 |
"vision_tower.encoder.layer.38.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
1279 |
"vision_tower.encoder.layer.38.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1280 |
"vision_tower.encoder.layer.38.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1281 |
"vision_tower.encoder.layer.38.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
1282 |
"vision_tower.encoder.layer.39.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1283 |
"vision_tower.encoder.layer.39.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1284 |
"vision_tower.encoder.layer.39.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
1294 |
"vision_tower.encoder.layer.39.mlp.fc1.weight": "model-00002-of-00016.safetensors",
|
1295 |
"vision_tower.encoder.layer.39.mlp.fc2.bias": "model-00002-of-00016.safetensors",
|
1296 |
"vision_tower.encoder.layer.39.mlp.fc2.weight": "model-00002-of-00016.safetensors",
|
|
|
1297 |
"vision_tower.encoder.layer.4.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
1298 |
"vision_tower.encoder.layer.4.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
1299 |
"vision_tower.encoder.layer.4.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
1309 |
"vision_tower.encoder.layer.4.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
1310 |
"vision_tower.encoder.layer.4.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
1311 |
"vision_tower.encoder.layer.4.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
1312 |
"vision_tower.encoder.layer.40.attention.k_norm.weight": "model-00002-of-00016.safetensors",
|
1313 |
"vision_tower.encoder.layer.40.attention.k_proj.weight": "model-00002-of-00016.safetensors",
|
1314 |
"vision_tower.encoder.layer.40.attention.projection_layer.bias": "model-00002-of-00016.safetensors",
|
|
|
1324 |
"vision_tower.encoder.layer.40.mlp.fc1.weight": "model-00003-of-00016.safetensors",
|
1325 |
"vision_tower.encoder.layer.40.mlp.fc2.bias": "model-00003-of-00016.safetensors",
|
1326 |
"vision_tower.encoder.layer.40.mlp.fc2.weight": "model-00003-of-00016.safetensors",
|
|
|
1327 |
"vision_tower.encoder.layer.41.attention.k_norm.weight": "model-00003-of-00016.safetensors",
|
1328 |
"vision_tower.encoder.layer.41.attention.k_proj.weight": "model-00003-of-00016.safetensors",
|
1329 |
"vision_tower.encoder.layer.41.attention.projection_layer.bias": "model-00003-of-00016.safetensors",
|
|
|
1339 |
"vision_tower.encoder.layer.41.mlp.fc1.weight": "model-00003-of-00016.safetensors",
|
1340 |
"vision_tower.encoder.layer.41.mlp.fc2.bias": "model-00003-of-00016.safetensors",
|
1341 |
"vision_tower.encoder.layer.41.mlp.fc2.weight": "model-00003-of-00016.safetensors",
|
|
|
1342 |
"vision_tower.encoder.layer.42.attention.k_norm.weight": "model-00003-of-00016.safetensors",
|
1343 |
"vision_tower.encoder.layer.42.attention.k_proj.weight": "model-00003-of-00016.safetensors",
|
1344 |
"vision_tower.encoder.layer.42.attention.projection_layer.bias": "model-00003-of-00016.safetensors",
|
|
|
1354 |
"vision_tower.encoder.layer.42.mlp.fc1.weight": "model-00003-of-00016.safetensors",
|
1355 |
"vision_tower.encoder.layer.42.mlp.fc2.bias": "model-00003-of-00016.safetensors",
|
1356 |
"vision_tower.encoder.layer.42.mlp.fc2.weight": "model-00003-of-00016.safetensors",
|
|
|
1357 |
"vision_tower.encoder.layer.43.attention.k_norm.weight": "model-00003-of-00016.safetensors",
|
1358 |
"vision_tower.encoder.layer.43.attention.k_proj.weight": "model-00003-of-00016.safetensors",
|
1359 |
"vision_tower.encoder.layer.43.attention.projection_layer.bias": "model-00003-of-00016.safetensors",
|
|
|
1369 |
"vision_tower.encoder.layer.43.mlp.fc1.weight": "model-00003-of-00016.safetensors",
|
1370 |
"vision_tower.encoder.layer.43.mlp.fc2.bias": "model-00003-of-00016.safetensors",
|
1371 |
"vision_tower.encoder.layer.43.mlp.fc2.weight": "model-00003-of-00016.safetensors",
|
|
|
1372 |
"vision_tower.encoder.layer.44.attention.k_norm.weight": "model-00003-of-00016.safetensors",
|
1373 |
"vision_tower.encoder.layer.44.attention.k_proj.weight": "model-00003-of-00016.safetensors",
|
1374 |
"vision_tower.encoder.layer.44.attention.projection_layer.bias": "model-00003-of-00016.safetensors",
|
|
|
1384 |
"vision_tower.encoder.layer.44.mlp.fc1.weight": "model-00003-of-00016.safetensors",
|
1385 |
"vision_tower.encoder.layer.44.mlp.fc2.bias": "model-00003-of-00016.safetensors",
|
1386 |
"vision_tower.encoder.layer.44.mlp.fc2.weight": "model-00003-of-00016.safetensors",
|
|
|
1387 |
"vision_tower.encoder.layer.5.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
1388 |
"vision_tower.encoder.layer.5.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
1389 |
"vision_tower.encoder.layer.5.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
1399 |
"vision_tower.encoder.layer.5.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
1400 |
"vision_tower.encoder.layer.5.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
1401 |
"vision_tower.encoder.layer.5.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
1402 |
"vision_tower.encoder.layer.6.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
1403 |
"vision_tower.encoder.layer.6.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
1404 |
"vision_tower.encoder.layer.6.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
1414 |
"vision_tower.encoder.layer.6.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
1415 |
"vision_tower.encoder.layer.6.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
1416 |
"vision_tower.encoder.layer.6.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
1417 |
"vision_tower.encoder.layer.7.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
1418 |
"vision_tower.encoder.layer.7.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
1419 |
"vision_tower.encoder.layer.7.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
1429 |
"vision_tower.encoder.layer.7.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
1430 |
"vision_tower.encoder.layer.7.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
1431 |
"vision_tower.encoder.layer.7.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
1432 |
"vision_tower.encoder.layer.8.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
1433 |
"vision_tower.encoder.layer.8.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
1434 |
"vision_tower.encoder.layer.8.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|
|
|
1444 |
"vision_tower.encoder.layer.8.mlp.fc1.weight": "model-00001-of-00016.safetensors",
|
1445 |
"vision_tower.encoder.layer.8.mlp.fc2.bias": "model-00001-of-00016.safetensors",
|
1446 |
"vision_tower.encoder.layer.8.mlp.fc2.weight": "model-00001-of-00016.safetensors",
|
|
|
1447 |
"vision_tower.encoder.layer.9.attention.k_norm.weight": "model-00001-of-00016.safetensors",
|
1448 |
"vision_tower.encoder.layer.9.attention.k_proj.weight": "model-00001-of-00016.safetensors",
|
1449 |
"vision_tower.encoder.layer.9.attention.projection_layer.bias": "model-00001-of-00016.safetensors",
|