OpenSourceRonin
commited on
Upload model Llama-2-7b-hf-v4-k4096-0-woft
Browse files- config.json +224 -224
- generation_config.json +0 -1
- model.safetensors +2 -2
config.json
CHANGED
@@ -39,7 +39,7 @@
|
|
39 |
"outlier_size": 0,
|
40 |
"vector_lens": [
|
41 |
-1,
|
42 |
-
|
43 |
]
|
44 |
},
|
45 |
"model.layers.0.mlp.gate_proj": {
|
@@ -63,7 +63,7 @@
|
|
63 |
"outlier_size": 0,
|
64 |
"vector_lens": [
|
65 |
-1,
|
66 |
-
|
67 |
]
|
68 |
},
|
69 |
"model.layers.0.mlp.up_proj": {
|
@@ -87,7 +87,7 @@
|
|
87 |
"outlier_size": 0,
|
88 |
"vector_lens": [
|
89 |
-1,
|
90 |
-
|
91 |
]
|
92 |
},
|
93 |
"model.layers.0.self_attn.k_proj": {
|
@@ -111,7 +111,7 @@
|
|
111 |
"outlier_size": 0,
|
112 |
"vector_lens": [
|
113 |
-1,
|
114 |
-
|
115 |
]
|
116 |
},
|
117 |
"model.layers.0.self_attn.o_proj": {
|
@@ -135,7 +135,7 @@
|
|
135 |
"outlier_size": 0,
|
136 |
"vector_lens": [
|
137 |
-1,
|
138 |
-
|
139 |
]
|
140 |
},
|
141 |
"model.layers.0.self_attn.q_proj": {
|
@@ -159,7 +159,7 @@
|
|
159 |
"outlier_size": 0,
|
160 |
"vector_lens": [
|
161 |
-1,
|
162 |
-
|
163 |
]
|
164 |
},
|
165 |
"model.layers.0.self_attn.v_proj": {
|
@@ -183,7 +183,7 @@
|
|
183 |
"outlier_size": 0,
|
184 |
"vector_lens": [
|
185 |
-1,
|
186 |
-
|
187 |
]
|
188 |
},
|
189 |
"model.layers.1.mlp.down_proj": {
|
@@ -207,7 +207,7 @@
|
|
207 |
"outlier_size": 0,
|
208 |
"vector_lens": [
|
209 |
-1,
|
210 |
-
|
211 |
]
|
212 |
},
|
213 |
"model.layers.1.mlp.gate_proj": {
|
@@ -231,7 +231,7 @@
|
|
231 |
"outlier_size": 0,
|
232 |
"vector_lens": [
|
233 |
-1,
|
234 |
-
|
235 |
]
|
236 |
},
|
237 |
"model.layers.1.mlp.up_proj": {
|
@@ -255,7 +255,7 @@
|
|
255 |
"outlier_size": 0,
|
256 |
"vector_lens": [
|
257 |
-1,
|
258 |
-
|
259 |
]
|
260 |
},
|
261 |
"model.layers.1.self_attn.k_proj": {
|
@@ -279,7 +279,7 @@
|
|
279 |
"outlier_size": 0,
|
280 |
"vector_lens": [
|
281 |
-1,
|
282 |
-
|
283 |
]
|
284 |
},
|
285 |
"model.layers.1.self_attn.o_proj": {
|
@@ -303,7 +303,7 @@
|
|
303 |
"outlier_size": 0,
|
304 |
"vector_lens": [
|
305 |
-1,
|
306 |
-
|
307 |
]
|
308 |
},
|
309 |
"model.layers.1.self_attn.q_proj": {
|
@@ -327,7 +327,7 @@
|
|
327 |
"outlier_size": 0,
|
328 |
"vector_lens": [
|
329 |
-1,
|
330 |
-
|
331 |
]
|
332 |
},
|
333 |
"model.layers.1.self_attn.v_proj": {
|
@@ -351,7 +351,7 @@
|
|
351 |
"outlier_size": 0,
|
352 |
"vector_lens": [
|
353 |
-1,
|
354 |
-
|
355 |
]
|
356 |
},
|
357 |
"model.layers.10.mlp.down_proj": {
|
@@ -375,7 +375,7 @@
|
|
375 |
"outlier_size": 0,
|
376 |
"vector_lens": [
|
377 |
-1,
|
378 |
-
|
379 |
]
|
380 |
},
|
381 |
"model.layers.10.mlp.gate_proj": {
|
@@ -399,7 +399,7 @@
|
|
399 |
"outlier_size": 0,
|
400 |
"vector_lens": [
|
401 |
-1,
|
402 |
-
|
403 |
]
|
404 |
},
|
405 |
"model.layers.10.mlp.up_proj": {
|
@@ -423,7 +423,7 @@
|
|
423 |
"outlier_size": 0,
|
424 |
"vector_lens": [
|
425 |
-1,
|
426 |
-
|
427 |
]
|
428 |
},
|
429 |
"model.layers.10.self_attn.k_proj": {
|
@@ -447,7 +447,7 @@
|
|
447 |
"outlier_size": 0,
|
448 |
"vector_lens": [
|
449 |
-1,
|
450 |
-
|
451 |
]
|
452 |
},
|
453 |
"model.layers.10.self_attn.o_proj": {
|
@@ -471,7 +471,7 @@
|
|
471 |
"outlier_size": 0,
|
472 |
"vector_lens": [
|
473 |
-1,
|
474 |
-
|
475 |
]
|
476 |
},
|
477 |
"model.layers.10.self_attn.q_proj": {
|
@@ -495,7 +495,7 @@
|
|
495 |
"outlier_size": 0,
|
496 |
"vector_lens": [
|
497 |
-1,
|
498 |
-
|
499 |
]
|
500 |
},
|
501 |
"model.layers.10.self_attn.v_proj": {
|
@@ -519,7 +519,7 @@
|
|
519 |
"outlier_size": 0,
|
520 |
"vector_lens": [
|
521 |
-1,
|
522 |
-
|
523 |
]
|
524 |
},
|
525 |
"model.layers.11.mlp.down_proj": {
|
@@ -543,7 +543,7 @@
|
|
543 |
"outlier_size": 0,
|
544 |
"vector_lens": [
|
545 |
-1,
|
546 |
-
|
547 |
]
|
548 |
},
|
549 |
"model.layers.11.mlp.gate_proj": {
|
@@ -567,7 +567,7 @@
|
|
567 |
"outlier_size": 0,
|
568 |
"vector_lens": [
|
569 |
-1,
|
570 |
-
|
571 |
]
|
572 |
},
|
573 |
"model.layers.11.mlp.up_proj": {
|
@@ -591,7 +591,7 @@
|
|
591 |
"outlier_size": 0,
|
592 |
"vector_lens": [
|
593 |
-1,
|
594 |
-
|
595 |
]
|
596 |
},
|
597 |
"model.layers.11.self_attn.k_proj": {
|
@@ -615,7 +615,7 @@
|
|
615 |
"outlier_size": 0,
|
616 |
"vector_lens": [
|
617 |
-1,
|
618 |
-
|
619 |
]
|
620 |
},
|
621 |
"model.layers.11.self_attn.o_proj": {
|
@@ -639,7 +639,7 @@
|
|
639 |
"outlier_size": 0,
|
640 |
"vector_lens": [
|
641 |
-1,
|
642 |
-
|
643 |
]
|
644 |
},
|
645 |
"model.layers.11.self_attn.q_proj": {
|
@@ -663,7 +663,7 @@
|
|
663 |
"outlier_size": 0,
|
664 |
"vector_lens": [
|
665 |
-1,
|
666 |
-
|
667 |
]
|
668 |
},
|
669 |
"model.layers.11.self_attn.v_proj": {
|
@@ -687,7 +687,7 @@
|
|
687 |
"outlier_size": 0,
|
688 |
"vector_lens": [
|
689 |
-1,
|
690 |
-
|
691 |
]
|
692 |
},
|
693 |
"model.layers.12.mlp.down_proj": {
|
@@ -711,7 +711,7 @@
|
|
711 |
"outlier_size": 0,
|
712 |
"vector_lens": [
|
713 |
-1,
|
714 |
-
|
715 |
]
|
716 |
},
|
717 |
"model.layers.12.mlp.gate_proj": {
|
@@ -735,7 +735,7 @@
|
|
735 |
"outlier_size": 0,
|
736 |
"vector_lens": [
|
737 |
-1,
|
738 |
-
|
739 |
]
|
740 |
},
|
741 |
"model.layers.12.mlp.up_proj": {
|
@@ -759,7 +759,7 @@
|
|
759 |
"outlier_size": 0,
|
760 |
"vector_lens": [
|
761 |
-1,
|
762 |
-
|
763 |
]
|
764 |
},
|
765 |
"model.layers.12.self_attn.k_proj": {
|
@@ -783,7 +783,7 @@
|
|
783 |
"outlier_size": 0,
|
784 |
"vector_lens": [
|
785 |
-1,
|
786 |
-
|
787 |
]
|
788 |
},
|
789 |
"model.layers.12.self_attn.o_proj": {
|
@@ -807,7 +807,7 @@
|
|
807 |
"outlier_size": 0,
|
808 |
"vector_lens": [
|
809 |
-1,
|
810 |
-
|
811 |
]
|
812 |
},
|
813 |
"model.layers.12.self_attn.q_proj": {
|
@@ -831,7 +831,7 @@
|
|
831 |
"outlier_size": 0,
|
832 |
"vector_lens": [
|
833 |
-1,
|
834 |
-
|
835 |
]
|
836 |
},
|
837 |
"model.layers.12.self_attn.v_proj": {
|
@@ -855,7 +855,7 @@
|
|
855 |
"outlier_size": 0,
|
856 |
"vector_lens": [
|
857 |
-1,
|
858 |
-
|
859 |
]
|
860 |
},
|
861 |
"model.layers.13.mlp.down_proj": {
|
@@ -879,7 +879,7 @@
|
|
879 |
"outlier_size": 0,
|
880 |
"vector_lens": [
|
881 |
-1,
|
882 |
-
|
883 |
]
|
884 |
},
|
885 |
"model.layers.13.mlp.gate_proj": {
|
@@ -903,7 +903,7 @@
|
|
903 |
"outlier_size": 0,
|
904 |
"vector_lens": [
|
905 |
-1,
|
906 |
-
|
907 |
]
|
908 |
},
|
909 |
"model.layers.13.mlp.up_proj": {
|
@@ -927,7 +927,7 @@
|
|
927 |
"outlier_size": 0,
|
928 |
"vector_lens": [
|
929 |
-1,
|
930 |
-
|
931 |
]
|
932 |
},
|
933 |
"model.layers.13.self_attn.k_proj": {
|
@@ -951,7 +951,7 @@
|
|
951 |
"outlier_size": 0,
|
952 |
"vector_lens": [
|
953 |
-1,
|
954 |
-
|
955 |
]
|
956 |
},
|
957 |
"model.layers.13.self_attn.o_proj": {
|
@@ -975,7 +975,7 @@
|
|
975 |
"outlier_size": 0,
|
976 |
"vector_lens": [
|
977 |
-1,
|
978 |
-
|
979 |
]
|
980 |
},
|
981 |
"model.layers.13.self_attn.q_proj": {
|
@@ -999,7 +999,7 @@
|
|
999 |
"outlier_size": 0,
|
1000 |
"vector_lens": [
|
1001 |
-1,
|
1002 |
-
|
1003 |
]
|
1004 |
},
|
1005 |
"model.layers.13.self_attn.v_proj": {
|
@@ -1023,7 +1023,7 @@
|
|
1023 |
"outlier_size": 0,
|
1024 |
"vector_lens": [
|
1025 |
-1,
|
1026 |
-
|
1027 |
]
|
1028 |
},
|
1029 |
"model.layers.14.mlp.down_proj": {
|
@@ -1047,7 +1047,7 @@
|
|
1047 |
"outlier_size": 0,
|
1048 |
"vector_lens": [
|
1049 |
-1,
|
1050 |
-
|
1051 |
]
|
1052 |
},
|
1053 |
"model.layers.14.mlp.gate_proj": {
|
@@ -1071,7 +1071,7 @@
|
|
1071 |
"outlier_size": 0,
|
1072 |
"vector_lens": [
|
1073 |
-1,
|
1074 |
-
|
1075 |
]
|
1076 |
},
|
1077 |
"model.layers.14.mlp.up_proj": {
|
@@ -1095,7 +1095,7 @@
|
|
1095 |
"outlier_size": 0,
|
1096 |
"vector_lens": [
|
1097 |
-1,
|
1098 |
-
|
1099 |
]
|
1100 |
},
|
1101 |
"model.layers.14.self_attn.k_proj": {
|
@@ -1119,7 +1119,7 @@
|
|
1119 |
"outlier_size": 0,
|
1120 |
"vector_lens": [
|
1121 |
-1,
|
1122 |
-
|
1123 |
]
|
1124 |
},
|
1125 |
"model.layers.14.self_attn.o_proj": {
|
@@ -1143,7 +1143,7 @@
|
|
1143 |
"outlier_size": 0,
|
1144 |
"vector_lens": [
|
1145 |
-1,
|
1146 |
-
|
1147 |
]
|
1148 |
},
|
1149 |
"model.layers.14.self_attn.q_proj": {
|
@@ -1167,7 +1167,7 @@
|
|
1167 |
"outlier_size": 0,
|
1168 |
"vector_lens": [
|
1169 |
-1,
|
1170 |
-
|
1171 |
]
|
1172 |
},
|
1173 |
"model.layers.14.self_attn.v_proj": {
|
@@ -1191,7 +1191,7 @@
|
|
1191 |
"outlier_size": 0,
|
1192 |
"vector_lens": [
|
1193 |
-1,
|
1194 |
-
|
1195 |
]
|
1196 |
},
|
1197 |
"model.layers.15.mlp.down_proj": {
|
@@ -1215,7 +1215,7 @@
|
|
1215 |
"outlier_size": 0,
|
1216 |
"vector_lens": [
|
1217 |
-1,
|
1218 |
-
|
1219 |
]
|
1220 |
},
|
1221 |
"model.layers.15.mlp.gate_proj": {
|
@@ -1239,7 +1239,7 @@
|
|
1239 |
"outlier_size": 0,
|
1240 |
"vector_lens": [
|
1241 |
-1,
|
1242 |
-
|
1243 |
]
|
1244 |
},
|
1245 |
"model.layers.15.mlp.up_proj": {
|
@@ -1263,7 +1263,7 @@
|
|
1263 |
"outlier_size": 0,
|
1264 |
"vector_lens": [
|
1265 |
-1,
|
1266 |
-
|
1267 |
]
|
1268 |
},
|
1269 |
"model.layers.15.self_attn.k_proj": {
|
@@ -1287,7 +1287,7 @@
|
|
1287 |
"outlier_size": 0,
|
1288 |
"vector_lens": [
|
1289 |
-1,
|
1290 |
-
|
1291 |
]
|
1292 |
},
|
1293 |
"model.layers.15.self_attn.o_proj": {
|
@@ -1311,7 +1311,7 @@
|
|
1311 |
"outlier_size": 0,
|
1312 |
"vector_lens": [
|
1313 |
-1,
|
1314 |
-
|
1315 |
]
|
1316 |
},
|
1317 |
"model.layers.15.self_attn.q_proj": {
|
@@ -1335,7 +1335,7 @@
|
|
1335 |
"outlier_size": 0,
|
1336 |
"vector_lens": [
|
1337 |
-1,
|
1338 |
-
|
1339 |
]
|
1340 |
},
|
1341 |
"model.layers.15.self_attn.v_proj": {
|
@@ -1359,7 +1359,7 @@
|
|
1359 |
"outlier_size": 0,
|
1360 |
"vector_lens": [
|
1361 |
-1,
|
1362 |
-
|
1363 |
]
|
1364 |
},
|
1365 |
"model.layers.16.mlp.down_proj": {
|
@@ -1383,7 +1383,7 @@
|
|
1383 |
"outlier_size": 0,
|
1384 |
"vector_lens": [
|
1385 |
-1,
|
1386 |
-
|
1387 |
]
|
1388 |
},
|
1389 |
"model.layers.16.mlp.gate_proj": {
|
@@ -1407,7 +1407,7 @@
|
|
1407 |
"outlier_size": 0,
|
1408 |
"vector_lens": [
|
1409 |
-1,
|
1410 |
-
|
1411 |
]
|
1412 |
},
|
1413 |
"model.layers.16.mlp.up_proj": {
|
@@ -1431,7 +1431,7 @@
|
|
1431 |
"outlier_size": 0,
|
1432 |
"vector_lens": [
|
1433 |
-1,
|
1434 |
-
|
1435 |
]
|
1436 |
},
|
1437 |
"model.layers.16.self_attn.k_proj": {
|
@@ -1455,7 +1455,7 @@
|
|
1455 |
"outlier_size": 0,
|
1456 |
"vector_lens": [
|
1457 |
-1,
|
1458 |
-
|
1459 |
]
|
1460 |
},
|
1461 |
"model.layers.16.self_attn.o_proj": {
|
@@ -1479,7 +1479,7 @@
|
|
1479 |
"outlier_size": 0,
|
1480 |
"vector_lens": [
|
1481 |
-1,
|
1482 |
-
|
1483 |
]
|
1484 |
},
|
1485 |
"model.layers.16.self_attn.q_proj": {
|
@@ -1503,7 +1503,7 @@
|
|
1503 |
"outlier_size": 0,
|
1504 |
"vector_lens": [
|
1505 |
-1,
|
1506 |
-
|
1507 |
]
|
1508 |
},
|
1509 |
"model.layers.16.self_attn.v_proj": {
|
@@ -1527,7 +1527,7 @@
|
|
1527 |
"outlier_size": 0,
|
1528 |
"vector_lens": [
|
1529 |
-1,
|
1530 |
-
|
1531 |
]
|
1532 |
},
|
1533 |
"model.layers.17.mlp.down_proj": {
|
@@ -1551,7 +1551,7 @@
|
|
1551 |
"outlier_size": 0,
|
1552 |
"vector_lens": [
|
1553 |
-1,
|
1554 |
-
|
1555 |
]
|
1556 |
},
|
1557 |
"model.layers.17.mlp.gate_proj": {
|
@@ -1575,7 +1575,7 @@
|
|
1575 |
"outlier_size": 0,
|
1576 |
"vector_lens": [
|
1577 |
-1,
|
1578 |
-
|
1579 |
]
|
1580 |
},
|
1581 |
"model.layers.17.mlp.up_proj": {
|
@@ -1599,7 +1599,7 @@
|
|
1599 |
"outlier_size": 0,
|
1600 |
"vector_lens": [
|
1601 |
-1,
|
1602 |
-
|
1603 |
]
|
1604 |
},
|
1605 |
"model.layers.17.self_attn.k_proj": {
|
@@ -1623,7 +1623,7 @@
|
|
1623 |
"outlier_size": 0,
|
1624 |
"vector_lens": [
|
1625 |
-1,
|
1626 |
-
|
1627 |
]
|
1628 |
},
|
1629 |
"model.layers.17.self_attn.o_proj": {
|
@@ -1647,7 +1647,7 @@
|
|
1647 |
"outlier_size": 0,
|
1648 |
"vector_lens": [
|
1649 |
-1,
|
1650 |
-
|
1651 |
]
|
1652 |
},
|
1653 |
"model.layers.17.self_attn.q_proj": {
|
@@ -1671,7 +1671,7 @@
|
|
1671 |
"outlier_size": 0,
|
1672 |
"vector_lens": [
|
1673 |
-1,
|
1674 |
-
|
1675 |
]
|
1676 |
},
|
1677 |
"model.layers.17.self_attn.v_proj": {
|
@@ -1695,7 +1695,7 @@
|
|
1695 |
"outlier_size": 0,
|
1696 |
"vector_lens": [
|
1697 |
-1,
|
1698 |
-
|
1699 |
]
|
1700 |
},
|
1701 |
"model.layers.18.mlp.down_proj": {
|
@@ -1719,7 +1719,7 @@
|
|
1719 |
"outlier_size": 0,
|
1720 |
"vector_lens": [
|
1721 |
-1,
|
1722 |
-
|
1723 |
]
|
1724 |
},
|
1725 |
"model.layers.18.mlp.gate_proj": {
|
@@ -1743,7 +1743,7 @@
|
|
1743 |
"outlier_size": 0,
|
1744 |
"vector_lens": [
|
1745 |
-1,
|
1746 |
-
|
1747 |
]
|
1748 |
},
|
1749 |
"model.layers.18.mlp.up_proj": {
|
@@ -1767,7 +1767,7 @@
|
|
1767 |
"outlier_size": 0,
|
1768 |
"vector_lens": [
|
1769 |
-1,
|
1770 |
-
|
1771 |
]
|
1772 |
},
|
1773 |
"model.layers.18.self_attn.k_proj": {
|
@@ -1791,7 +1791,7 @@
|
|
1791 |
"outlier_size": 0,
|
1792 |
"vector_lens": [
|
1793 |
-1,
|
1794 |
-
|
1795 |
]
|
1796 |
},
|
1797 |
"model.layers.18.self_attn.o_proj": {
|
@@ -1815,7 +1815,7 @@
|
|
1815 |
"outlier_size": 0,
|
1816 |
"vector_lens": [
|
1817 |
-1,
|
1818 |
-
|
1819 |
]
|
1820 |
},
|
1821 |
"model.layers.18.self_attn.q_proj": {
|
@@ -1839,7 +1839,7 @@
|
|
1839 |
"outlier_size": 0,
|
1840 |
"vector_lens": [
|
1841 |
-1,
|
1842 |
-
|
1843 |
]
|
1844 |
},
|
1845 |
"model.layers.18.self_attn.v_proj": {
|
@@ -1863,7 +1863,7 @@
|
|
1863 |
"outlier_size": 0,
|
1864 |
"vector_lens": [
|
1865 |
-1,
|
1866 |
-
|
1867 |
]
|
1868 |
},
|
1869 |
"model.layers.19.mlp.down_proj": {
|
@@ -1887,7 +1887,7 @@
|
|
1887 |
"outlier_size": 0,
|
1888 |
"vector_lens": [
|
1889 |
-1,
|
1890 |
-
|
1891 |
]
|
1892 |
},
|
1893 |
"model.layers.19.mlp.gate_proj": {
|
@@ -1911,7 +1911,7 @@
|
|
1911 |
"outlier_size": 0,
|
1912 |
"vector_lens": [
|
1913 |
-1,
|
1914 |
-
|
1915 |
]
|
1916 |
},
|
1917 |
"model.layers.19.mlp.up_proj": {
|
@@ -1935,7 +1935,7 @@
|
|
1935 |
"outlier_size": 0,
|
1936 |
"vector_lens": [
|
1937 |
-1,
|
1938 |
-
|
1939 |
]
|
1940 |
},
|
1941 |
"model.layers.19.self_attn.k_proj": {
|
@@ -1959,7 +1959,7 @@
|
|
1959 |
"outlier_size": 0,
|
1960 |
"vector_lens": [
|
1961 |
-1,
|
1962 |
-
|
1963 |
]
|
1964 |
},
|
1965 |
"model.layers.19.self_attn.o_proj": {
|
@@ -1983,7 +1983,7 @@
|
|
1983 |
"outlier_size": 0,
|
1984 |
"vector_lens": [
|
1985 |
-1,
|
1986 |
-
|
1987 |
]
|
1988 |
},
|
1989 |
"model.layers.19.self_attn.q_proj": {
|
@@ -2007,7 +2007,7 @@
|
|
2007 |
"outlier_size": 0,
|
2008 |
"vector_lens": [
|
2009 |
-1,
|
2010 |
-
|
2011 |
]
|
2012 |
},
|
2013 |
"model.layers.19.self_attn.v_proj": {
|
@@ -2031,7 +2031,7 @@
|
|
2031 |
"outlier_size": 0,
|
2032 |
"vector_lens": [
|
2033 |
-1,
|
2034 |
-
|
2035 |
]
|
2036 |
},
|
2037 |
"model.layers.2.mlp.down_proj": {
|
@@ -2055,7 +2055,7 @@
|
|
2055 |
"outlier_size": 0,
|
2056 |
"vector_lens": [
|
2057 |
-1,
|
2058 |
-
|
2059 |
]
|
2060 |
},
|
2061 |
"model.layers.2.mlp.gate_proj": {
|
@@ -2079,7 +2079,7 @@
|
|
2079 |
"outlier_size": 0,
|
2080 |
"vector_lens": [
|
2081 |
-1,
|
2082 |
-
|
2083 |
]
|
2084 |
},
|
2085 |
"model.layers.2.mlp.up_proj": {
|
@@ -2103,7 +2103,7 @@
|
|
2103 |
"outlier_size": 0,
|
2104 |
"vector_lens": [
|
2105 |
-1,
|
2106 |
-
|
2107 |
]
|
2108 |
},
|
2109 |
"model.layers.2.self_attn.k_proj": {
|
@@ -2127,7 +2127,7 @@
|
|
2127 |
"outlier_size": 0,
|
2128 |
"vector_lens": [
|
2129 |
-1,
|
2130 |
-
|
2131 |
]
|
2132 |
},
|
2133 |
"model.layers.2.self_attn.o_proj": {
|
@@ -2151,7 +2151,7 @@
|
|
2151 |
"outlier_size": 0,
|
2152 |
"vector_lens": [
|
2153 |
-1,
|
2154 |
-
|
2155 |
]
|
2156 |
},
|
2157 |
"model.layers.2.self_attn.q_proj": {
|
@@ -2175,7 +2175,7 @@
|
|
2175 |
"outlier_size": 0,
|
2176 |
"vector_lens": [
|
2177 |
-1,
|
2178 |
-
|
2179 |
]
|
2180 |
},
|
2181 |
"model.layers.2.self_attn.v_proj": {
|
@@ -2199,7 +2199,7 @@
|
|
2199 |
"outlier_size": 0,
|
2200 |
"vector_lens": [
|
2201 |
-1,
|
2202 |
-
|
2203 |
]
|
2204 |
},
|
2205 |
"model.layers.20.mlp.down_proj": {
|
@@ -2223,7 +2223,7 @@
|
|
2223 |
"outlier_size": 0,
|
2224 |
"vector_lens": [
|
2225 |
-1,
|
2226 |
-
|
2227 |
]
|
2228 |
},
|
2229 |
"model.layers.20.mlp.gate_proj": {
|
@@ -2247,7 +2247,7 @@
|
|
2247 |
"outlier_size": 0,
|
2248 |
"vector_lens": [
|
2249 |
-1,
|
2250 |
-
|
2251 |
]
|
2252 |
},
|
2253 |
"model.layers.20.mlp.up_proj": {
|
@@ -2271,7 +2271,7 @@
|
|
2271 |
"outlier_size": 0,
|
2272 |
"vector_lens": [
|
2273 |
-1,
|
2274 |
-
|
2275 |
]
|
2276 |
},
|
2277 |
"model.layers.20.self_attn.k_proj": {
|
@@ -2295,7 +2295,7 @@
|
|
2295 |
"outlier_size": 0,
|
2296 |
"vector_lens": [
|
2297 |
-1,
|
2298 |
-
|
2299 |
]
|
2300 |
},
|
2301 |
"model.layers.20.self_attn.o_proj": {
|
@@ -2319,7 +2319,7 @@
|
|
2319 |
"outlier_size": 0,
|
2320 |
"vector_lens": [
|
2321 |
-1,
|
2322 |
-
|
2323 |
]
|
2324 |
},
|
2325 |
"model.layers.20.self_attn.q_proj": {
|
@@ -2343,7 +2343,7 @@
|
|
2343 |
"outlier_size": 0,
|
2344 |
"vector_lens": [
|
2345 |
-1,
|
2346 |
-
|
2347 |
]
|
2348 |
},
|
2349 |
"model.layers.20.self_attn.v_proj": {
|
@@ -2367,7 +2367,7 @@
|
|
2367 |
"outlier_size": 0,
|
2368 |
"vector_lens": [
|
2369 |
-1,
|
2370 |
-
|
2371 |
]
|
2372 |
},
|
2373 |
"model.layers.21.mlp.down_proj": {
|
@@ -2391,7 +2391,7 @@
|
|
2391 |
"outlier_size": 0,
|
2392 |
"vector_lens": [
|
2393 |
-1,
|
2394 |
-
|
2395 |
]
|
2396 |
},
|
2397 |
"model.layers.21.mlp.gate_proj": {
|
@@ -2415,7 +2415,7 @@
|
|
2415 |
"outlier_size": 0,
|
2416 |
"vector_lens": [
|
2417 |
-1,
|
2418 |
-
|
2419 |
]
|
2420 |
},
|
2421 |
"model.layers.21.mlp.up_proj": {
|
@@ -2439,7 +2439,7 @@
|
|
2439 |
"outlier_size": 0,
|
2440 |
"vector_lens": [
|
2441 |
-1,
|
2442 |
-
|
2443 |
]
|
2444 |
},
|
2445 |
"model.layers.21.self_attn.k_proj": {
|
@@ -2463,7 +2463,7 @@
|
|
2463 |
"outlier_size": 0,
|
2464 |
"vector_lens": [
|
2465 |
-1,
|
2466 |
-
|
2467 |
]
|
2468 |
},
|
2469 |
"model.layers.21.self_attn.o_proj": {
|
@@ -2487,7 +2487,7 @@
|
|
2487 |
"outlier_size": 0,
|
2488 |
"vector_lens": [
|
2489 |
-1,
|
2490 |
-
|
2491 |
]
|
2492 |
},
|
2493 |
"model.layers.21.self_attn.q_proj": {
|
@@ -2511,7 +2511,7 @@
|
|
2511 |
"outlier_size": 0,
|
2512 |
"vector_lens": [
|
2513 |
-1,
|
2514 |
-
|
2515 |
]
|
2516 |
},
|
2517 |
"model.layers.21.self_attn.v_proj": {
|
@@ -2535,7 +2535,7 @@
|
|
2535 |
"outlier_size": 0,
|
2536 |
"vector_lens": [
|
2537 |
-1,
|
2538 |
-
|
2539 |
]
|
2540 |
},
|
2541 |
"model.layers.22.mlp.down_proj": {
|
@@ -2559,7 +2559,7 @@
|
|
2559 |
"outlier_size": 0,
|
2560 |
"vector_lens": [
|
2561 |
-1,
|
2562 |
-
|
2563 |
]
|
2564 |
},
|
2565 |
"model.layers.22.mlp.gate_proj": {
|
@@ -2583,7 +2583,7 @@
|
|
2583 |
"outlier_size": 0,
|
2584 |
"vector_lens": [
|
2585 |
-1,
|
2586 |
-
|
2587 |
]
|
2588 |
},
|
2589 |
"model.layers.22.mlp.up_proj": {
|
@@ -2607,7 +2607,7 @@
|
|
2607 |
"outlier_size": 0,
|
2608 |
"vector_lens": [
|
2609 |
-1,
|
2610 |
-
|
2611 |
]
|
2612 |
},
|
2613 |
"model.layers.22.self_attn.k_proj": {
|
@@ -2631,7 +2631,7 @@
|
|
2631 |
"outlier_size": 0,
|
2632 |
"vector_lens": [
|
2633 |
-1,
|
2634 |
-
|
2635 |
]
|
2636 |
},
|
2637 |
"model.layers.22.self_attn.o_proj": {
|
@@ -2655,7 +2655,7 @@
|
|
2655 |
"outlier_size": 0,
|
2656 |
"vector_lens": [
|
2657 |
-1,
|
2658 |
-
|
2659 |
]
|
2660 |
},
|
2661 |
"model.layers.22.self_attn.q_proj": {
|
@@ -2679,7 +2679,7 @@
|
|
2679 |
"outlier_size": 0,
|
2680 |
"vector_lens": [
|
2681 |
-1,
|
2682 |
-
|
2683 |
]
|
2684 |
},
|
2685 |
"model.layers.22.self_attn.v_proj": {
|
@@ -2703,7 +2703,7 @@
|
|
2703 |
"outlier_size": 0,
|
2704 |
"vector_lens": [
|
2705 |
-1,
|
2706 |
-
|
2707 |
]
|
2708 |
},
|
2709 |
"model.layers.23.mlp.down_proj": {
|
@@ -2727,7 +2727,7 @@
|
|
2727 |
"outlier_size": 0,
|
2728 |
"vector_lens": [
|
2729 |
-1,
|
2730 |
-
|
2731 |
]
|
2732 |
},
|
2733 |
"model.layers.23.mlp.gate_proj": {
|
@@ -2751,7 +2751,7 @@
|
|
2751 |
"outlier_size": 0,
|
2752 |
"vector_lens": [
|
2753 |
-1,
|
2754 |
-
|
2755 |
]
|
2756 |
},
|
2757 |
"model.layers.23.mlp.up_proj": {
|
@@ -2775,7 +2775,7 @@
|
|
2775 |
"outlier_size": 0,
|
2776 |
"vector_lens": [
|
2777 |
-1,
|
2778 |
-
|
2779 |
]
|
2780 |
},
|
2781 |
"model.layers.23.self_attn.k_proj": {
|
@@ -2799,7 +2799,7 @@
|
|
2799 |
"outlier_size": 0,
|
2800 |
"vector_lens": [
|
2801 |
-1,
|
2802 |
-
|
2803 |
]
|
2804 |
},
|
2805 |
"model.layers.23.self_attn.o_proj": {
|
@@ -2823,7 +2823,7 @@
|
|
2823 |
"outlier_size": 0,
|
2824 |
"vector_lens": [
|
2825 |
-1,
|
2826 |
-
|
2827 |
]
|
2828 |
},
|
2829 |
"model.layers.23.self_attn.q_proj": {
|
@@ -2847,7 +2847,7 @@
|
|
2847 |
"outlier_size": 0,
|
2848 |
"vector_lens": [
|
2849 |
-1,
|
2850 |
-
|
2851 |
]
|
2852 |
},
|
2853 |
"model.layers.23.self_attn.v_proj": {
|
@@ -2871,7 +2871,7 @@
|
|
2871 |
"outlier_size": 0,
|
2872 |
"vector_lens": [
|
2873 |
-1,
|
2874 |
-
|
2875 |
]
|
2876 |
},
|
2877 |
"model.layers.24.mlp.down_proj": {
|
@@ -2895,7 +2895,7 @@
|
|
2895 |
"outlier_size": 0,
|
2896 |
"vector_lens": [
|
2897 |
-1,
|
2898 |
-
|
2899 |
]
|
2900 |
},
|
2901 |
"model.layers.24.mlp.gate_proj": {
|
@@ -2919,7 +2919,7 @@
|
|
2919 |
"outlier_size": 0,
|
2920 |
"vector_lens": [
|
2921 |
-1,
|
2922 |
-
|
2923 |
]
|
2924 |
},
|
2925 |
"model.layers.24.mlp.up_proj": {
|
@@ -2943,7 +2943,7 @@
|
|
2943 |
"outlier_size": 0,
|
2944 |
"vector_lens": [
|
2945 |
-1,
|
2946 |
-
|
2947 |
]
|
2948 |
},
|
2949 |
"model.layers.24.self_attn.k_proj": {
|
@@ -2967,7 +2967,7 @@
|
|
2967 |
"outlier_size": 0,
|
2968 |
"vector_lens": [
|
2969 |
-1,
|
2970 |
-
|
2971 |
]
|
2972 |
},
|
2973 |
"model.layers.24.self_attn.o_proj": {
|
@@ -2991,7 +2991,7 @@
|
|
2991 |
"outlier_size": 0,
|
2992 |
"vector_lens": [
|
2993 |
-1,
|
2994 |
-
|
2995 |
]
|
2996 |
},
|
2997 |
"model.layers.24.self_attn.q_proj": {
|
@@ -3015,7 +3015,7 @@
|
|
3015 |
"outlier_size": 0,
|
3016 |
"vector_lens": [
|
3017 |
-1,
|
3018 |
-
|
3019 |
]
|
3020 |
},
|
3021 |
"model.layers.24.self_attn.v_proj": {
|
@@ -3039,7 +3039,7 @@
|
|
3039 |
"outlier_size": 0,
|
3040 |
"vector_lens": [
|
3041 |
-1,
|
3042 |
-
|
3043 |
]
|
3044 |
},
|
3045 |
"model.layers.25.mlp.down_proj": {
|
@@ -3063,7 +3063,7 @@
|
|
3063 |
"outlier_size": 0,
|
3064 |
"vector_lens": [
|
3065 |
-1,
|
3066 |
-
|
3067 |
]
|
3068 |
},
|
3069 |
"model.layers.25.mlp.gate_proj": {
|
@@ -3087,7 +3087,7 @@
|
|
3087 |
"outlier_size": 0,
|
3088 |
"vector_lens": [
|
3089 |
-1,
|
3090 |
-
|
3091 |
]
|
3092 |
},
|
3093 |
"model.layers.25.mlp.up_proj": {
|
@@ -3111,7 +3111,7 @@
|
|
3111 |
"outlier_size": 0,
|
3112 |
"vector_lens": [
|
3113 |
-1,
|
3114 |
-
|
3115 |
]
|
3116 |
},
|
3117 |
"model.layers.25.self_attn.k_proj": {
|
@@ -3135,7 +3135,7 @@
|
|
3135 |
"outlier_size": 0,
|
3136 |
"vector_lens": [
|
3137 |
-1,
|
3138 |
-
|
3139 |
]
|
3140 |
},
|
3141 |
"model.layers.25.self_attn.o_proj": {
|
@@ -3159,7 +3159,7 @@
|
|
3159 |
"outlier_size": 0,
|
3160 |
"vector_lens": [
|
3161 |
-1,
|
3162 |
-
|
3163 |
]
|
3164 |
},
|
3165 |
"model.layers.25.self_attn.q_proj": {
|
@@ -3183,7 +3183,7 @@
|
|
3183 |
"outlier_size": 0,
|
3184 |
"vector_lens": [
|
3185 |
-1,
|
3186 |
-
|
3187 |
]
|
3188 |
},
|
3189 |
"model.layers.25.self_attn.v_proj": {
|
@@ -3207,7 +3207,7 @@
|
|
3207 |
"outlier_size": 0,
|
3208 |
"vector_lens": [
|
3209 |
-1,
|
3210 |
-
|
3211 |
]
|
3212 |
},
|
3213 |
"model.layers.26.mlp.down_proj": {
|
@@ -3231,7 +3231,7 @@
|
|
3231 |
"outlier_size": 0,
|
3232 |
"vector_lens": [
|
3233 |
-1,
|
3234 |
-
|
3235 |
]
|
3236 |
},
|
3237 |
"model.layers.26.mlp.gate_proj": {
|
@@ -3255,7 +3255,7 @@
|
|
3255 |
"outlier_size": 0,
|
3256 |
"vector_lens": [
|
3257 |
-1,
|
3258 |
-
|
3259 |
]
|
3260 |
},
|
3261 |
"model.layers.26.mlp.up_proj": {
|
@@ -3279,7 +3279,7 @@
|
|
3279 |
"outlier_size": 0,
|
3280 |
"vector_lens": [
|
3281 |
-1,
|
3282 |
-
|
3283 |
]
|
3284 |
},
|
3285 |
"model.layers.26.self_attn.k_proj": {
|
@@ -3303,7 +3303,7 @@
|
|
3303 |
"outlier_size": 0,
|
3304 |
"vector_lens": [
|
3305 |
-1,
|
3306 |
-
|
3307 |
]
|
3308 |
},
|
3309 |
"model.layers.26.self_attn.o_proj": {
|
@@ -3327,7 +3327,7 @@
|
|
3327 |
"outlier_size": 0,
|
3328 |
"vector_lens": [
|
3329 |
-1,
|
3330 |
-
|
3331 |
]
|
3332 |
},
|
3333 |
"model.layers.26.self_attn.q_proj": {
|
@@ -3351,7 +3351,7 @@
|
|
3351 |
"outlier_size": 0,
|
3352 |
"vector_lens": [
|
3353 |
-1,
|
3354 |
-
|
3355 |
]
|
3356 |
},
|
3357 |
"model.layers.26.self_attn.v_proj": {
|
@@ -3375,7 +3375,7 @@
|
|
3375 |
"outlier_size": 0,
|
3376 |
"vector_lens": [
|
3377 |
-1,
|
3378 |
-
|
3379 |
]
|
3380 |
},
|
3381 |
"model.layers.27.mlp.down_proj": {
|
@@ -3399,7 +3399,7 @@
|
|
3399 |
"outlier_size": 0,
|
3400 |
"vector_lens": [
|
3401 |
-1,
|
3402 |
-
|
3403 |
]
|
3404 |
},
|
3405 |
"model.layers.27.mlp.gate_proj": {
|
@@ -3423,7 +3423,7 @@
|
|
3423 |
"outlier_size": 0,
|
3424 |
"vector_lens": [
|
3425 |
-1,
|
3426 |
-
|
3427 |
]
|
3428 |
},
|
3429 |
"model.layers.27.mlp.up_proj": {
|
@@ -3447,7 +3447,7 @@
|
|
3447 |
"outlier_size": 0,
|
3448 |
"vector_lens": [
|
3449 |
-1,
|
3450 |
-
|
3451 |
]
|
3452 |
},
|
3453 |
"model.layers.27.self_attn.k_proj": {
|
@@ -3471,7 +3471,7 @@
|
|
3471 |
"outlier_size": 0,
|
3472 |
"vector_lens": [
|
3473 |
-1,
|
3474 |
-
|
3475 |
]
|
3476 |
},
|
3477 |
"model.layers.27.self_attn.o_proj": {
|
@@ -3495,7 +3495,7 @@
|
|
3495 |
"outlier_size": 0,
|
3496 |
"vector_lens": [
|
3497 |
-1,
|
3498 |
-
|
3499 |
]
|
3500 |
},
|
3501 |
"model.layers.27.self_attn.q_proj": {
|
@@ -3519,7 +3519,7 @@
|
|
3519 |
"outlier_size": 0,
|
3520 |
"vector_lens": [
|
3521 |
-1,
|
3522 |
-
|
3523 |
]
|
3524 |
},
|
3525 |
"model.layers.27.self_attn.v_proj": {
|
@@ -3543,7 +3543,7 @@
|
|
3543 |
"outlier_size": 0,
|
3544 |
"vector_lens": [
|
3545 |
-1,
|
3546 |
-
|
3547 |
]
|
3548 |
},
|
3549 |
"model.layers.28.mlp.down_proj": {
|
@@ -3567,7 +3567,7 @@
|
|
3567 |
"outlier_size": 0,
|
3568 |
"vector_lens": [
|
3569 |
-1,
|
3570 |
-
|
3571 |
]
|
3572 |
},
|
3573 |
"model.layers.28.mlp.gate_proj": {
|
@@ -3591,7 +3591,7 @@
|
|
3591 |
"outlier_size": 0,
|
3592 |
"vector_lens": [
|
3593 |
-1,
|
3594 |
-
|
3595 |
]
|
3596 |
},
|
3597 |
"model.layers.28.mlp.up_proj": {
|
@@ -3615,7 +3615,7 @@
|
|
3615 |
"outlier_size": 0,
|
3616 |
"vector_lens": [
|
3617 |
-1,
|
3618 |
-
|
3619 |
]
|
3620 |
},
|
3621 |
"model.layers.28.self_attn.k_proj": {
|
@@ -3639,7 +3639,7 @@
|
|
3639 |
"outlier_size": 0,
|
3640 |
"vector_lens": [
|
3641 |
-1,
|
3642 |
-
|
3643 |
]
|
3644 |
},
|
3645 |
"model.layers.28.self_attn.o_proj": {
|
@@ -3663,7 +3663,7 @@
|
|
3663 |
"outlier_size": 0,
|
3664 |
"vector_lens": [
|
3665 |
-1,
|
3666 |
-
|
3667 |
]
|
3668 |
},
|
3669 |
"model.layers.28.self_attn.q_proj": {
|
@@ -3687,7 +3687,7 @@
|
|
3687 |
"outlier_size": 0,
|
3688 |
"vector_lens": [
|
3689 |
-1,
|
3690 |
-
|
3691 |
]
|
3692 |
},
|
3693 |
"model.layers.28.self_attn.v_proj": {
|
@@ -3711,7 +3711,7 @@
|
|
3711 |
"outlier_size": 0,
|
3712 |
"vector_lens": [
|
3713 |
-1,
|
3714 |
-
|
3715 |
]
|
3716 |
},
|
3717 |
"model.layers.29.mlp.down_proj": {
|
@@ -3735,7 +3735,7 @@
|
|
3735 |
"outlier_size": 0,
|
3736 |
"vector_lens": [
|
3737 |
-1,
|
3738 |
-
|
3739 |
]
|
3740 |
},
|
3741 |
"model.layers.29.mlp.gate_proj": {
|
@@ -3759,7 +3759,7 @@
|
|
3759 |
"outlier_size": 0,
|
3760 |
"vector_lens": [
|
3761 |
-1,
|
3762 |
-
|
3763 |
]
|
3764 |
},
|
3765 |
"model.layers.29.mlp.up_proj": {
|
@@ -3783,7 +3783,7 @@
|
|
3783 |
"outlier_size": 0,
|
3784 |
"vector_lens": [
|
3785 |
-1,
|
3786 |
-
|
3787 |
]
|
3788 |
},
|
3789 |
"model.layers.29.self_attn.k_proj": {
|
@@ -3807,7 +3807,7 @@
|
|
3807 |
"outlier_size": 0,
|
3808 |
"vector_lens": [
|
3809 |
-1,
|
3810 |
-
|
3811 |
]
|
3812 |
},
|
3813 |
"model.layers.29.self_attn.o_proj": {
|
@@ -3831,7 +3831,7 @@
|
|
3831 |
"outlier_size": 0,
|
3832 |
"vector_lens": [
|
3833 |
-1,
|
3834 |
-
|
3835 |
]
|
3836 |
},
|
3837 |
"model.layers.29.self_attn.q_proj": {
|
@@ -3855,7 +3855,7 @@
|
|
3855 |
"outlier_size": 0,
|
3856 |
"vector_lens": [
|
3857 |
-1,
|
3858 |
-
|
3859 |
]
|
3860 |
},
|
3861 |
"model.layers.29.self_attn.v_proj": {
|
@@ -3879,7 +3879,7 @@
|
|
3879 |
"outlier_size": 0,
|
3880 |
"vector_lens": [
|
3881 |
-1,
|
3882 |
-
|
3883 |
]
|
3884 |
},
|
3885 |
"model.layers.3.mlp.down_proj": {
|
@@ -3903,7 +3903,7 @@
|
|
3903 |
"outlier_size": 0,
|
3904 |
"vector_lens": [
|
3905 |
-1,
|
3906 |
-
|
3907 |
]
|
3908 |
},
|
3909 |
"model.layers.3.mlp.gate_proj": {
|
@@ -3927,7 +3927,7 @@
|
|
3927 |
"outlier_size": 0,
|
3928 |
"vector_lens": [
|
3929 |
-1,
|
3930 |
-
|
3931 |
]
|
3932 |
},
|
3933 |
"model.layers.3.mlp.up_proj": {
|
@@ -3951,7 +3951,7 @@
|
|
3951 |
"outlier_size": 0,
|
3952 |
"vector_lens": [
|
3953 |
-1,
|
3954 |
-
|
3955 |
]
|
3956 |
},
|
3957 |
"model.layers.3.self_attn.k_proj": {
|
@@ -3975,7 +3975,7 @@
|
|
3975 |
"outlier_size": 0,
|
3976 |
"vector_lens": [
|
3977 |
-1,
|
3978 |
-
|
3979 |
]
|
3980 |
},
|
3981 |
"model.layers.3.self_attn.o_proj": {
|
@@ -3999,7 +3999,7 @@
|
|
3999 |
"outlier_size": 0,
|
4000 |
"vector_lens": [
|
4001 |
-1,
|
4002 |
-
|
4003 |
]
|
4004 |
},
|
4005 |
"model.layers.3.self_attn.q_proj": {
|
@@ -4023,7 +4023,7 @@
|
|
4023 |
"outlier_size": 0,
|
4024 |
"vector_lens": [
|
4025 |
-1,
|
4026 |
-
|
4027 |
]
|
4028 |
},
|
4029 |
"model.layers.3.self_attn.v_proj": {
|
@@ -4047,7 +4047,7 @@
|
|
4047 |
"outlier_size": 0,
|
4048 |
"vector_lens": [
|
4049 |
-1,
|
4050 |
-
|
4051 |
]
|
4052 |
},
|
4053 |
"model.layers.30.mlp.down_proj": {
|
@@ -4071,7 +4071,7 @@
|
|
4071 |
"outlier_size": 0,
|
4072 |
"vector_lens": [
|
4073 |
-1,
|
4074 |
-
|
4075 |
]
|
4076 |
},
|
4077 |
"model.layers.30.mlp.gate_proj": {
|
@@ -4095,7 +4095,7 @@
|
|
4095 |
"outlier_size": 0,
|
4096 |
"vector_lens": [
|
4097 |
-1,
|
4098 |
-
|
4099 |
]
|
4100 |
},
|
4101 |
"model.layers.30.mlp.up_proj": {
|
@@ -4119,7 +4119,7 @@
|
|
4119 |
"outlier_size": 0,
|
4120 |
"vector_lens": [
|
4121 |
-1,
|
4122 |
-
|
4123 |
]
|
4124 |
},
|
4125 |
"model.layers.30.self_attn.k_proj": {
|
@@ -4143,7 +4143,7 @@
|
|
4143 |
"outlier_size": 0,
|
4144 |
"vector_lens": [
|
4145 |
-1,
|
4146 |
-
|
4147 |
]
|
4148 |
},
|
4149 |
"model.layers.30.self_attn.o_proj": {
|
@@ -4167,7 +4167,7 @@
|
|
4167 |
"outlier_size": 0,
|
4168 |
"vector_lens": [
|
4169 |
-1,
|
4170 |
-
|
4171 |
]
|
4172 |
},
|
4173 |
"model.layers.30.self_attn.q_proj": {
|
@@ -4191,7 +4191,7 @@
|
|
4191 |
"outlier_size": 0,
|
4192 |
"vector_lens": [
|
4193 |
-1,
|
4194 |
-
|
4195 |
]
|
4196 |
},
|
4197 |
"model.layers.30.self_attn.v_proj": {
|
@@ -4215,7 +4215,7 @@
|
|
4215 |
"outlier_size": 0,
|
4216 |
"vector_lens": [
|
4217 |
-1,
|
4218 |
-
|
4219 |
]
|
4220 |
},
|
4221 |
"model.layers.31.mlp.down_proj": {
|
@@ -4239,7 +4239,7 @@
|
|
4239 |
"outlier_size": 0,
|
4240 |
"vector_lens": [
|
4241 |
-1,
|
4242 |
-
|
4243 |
]
|
4244 |
},
|
4245 |
"model.layers.31.mlp.gate_proj": {
|
@@ -4263,7 +4263,7 @@
|
|
4263 |
"outlier_size": 0,
|
4264 |
"vector_lens": [
|
4265 |
-1,
|
4266 |
-
|
4267 |
]
|
4268 |
},
|
4269 |
"model.layers.31.mlp.up_proj": {
|
@@ -4287,7 +4287,7 @@
|
|
4287 |
"outlier_size": 0,
|
4288 |
"vector_lens": [
|
4289 |
-1,
|
4290 |
-
|
4291 |
]
|
4292 |
},
|
4293 |
"model.layers.31.self_attn.k_proj": {
|
@@ -4311,7 +4311,7 @@
|
|
4311 |
"outlier_size": 0,
|
4312 |
"vector_lens": [
|
4313 |
-1,
|
4314 |
-
|
4315 |
]
|
4316 |
},
|
4317 |
"model.layers.31.self_attn.o_proj": {
|
@@ -4335,7 +4335,7 @@
|
|
4335 |
"outlier_size": 0,
|
4336 |
"vector_lens": [
|
4337 |
-1,
|
4338 |
-
|
4339 |
]
|
4340 |
},
|
4341 |
"model.layers.31.self_attn.q_proj": {
|
@@ -4359,7 +4359,7 @@
|
|
4359 |
"outlier_size": 0,
|
4360 |
"vector_lens": [
|
4361 |
-1,
|
4362 |
-
|
4363 |
]
|
4364 |
},
|
4365 |
"model.layers.31.self_attn.v_proj": {
|
@@ -4383,7 +4383,7 @@
|
|
4383 |
"outlier_size": 0,
|
4384 |
"vector_lens": [
|
4385 |
-1,
|
4386 |
-
|
4387 |
]
|
4388 |
},
|
4389 |
"model.layers.4.mlp.down_proj": {
|
@@ -4407,7 +4407,7 @@
|
|
4407 |
"outlier_size": 0,
|
4408 |
"vector_lens": [
|
4409 |
-1,
|
4410 |
-
|
4411 |
]
|
4412 |
},
|
4413 |
"model.layers.4.mlp.gate_proj": {
|
@@ -4431,7 +4431,7 @@
|
|
4431 |
"outlier_size": 0,
|
4432 |
"vector_lens": [
|
4433 |
-1,
|
4434 |
-
|
4435 |
]
|
4436 |
},
|
4437 |
"model.layers.4.mlp.up_proj": {
|
@@ -4455,7 +4455,7 @@
|
|
4455 |
"outlier_size": 0,
|
4456 |
"vector_lens": [
|
4457 |
-1,
|
4458 |
-
|
4459 |
]
|
4460 |
},
|
4461 |
"model.layers.4.self_attn.k_proj": {
|
@@ -4479,7 +4479,7 @@
|
|
4479 |
"outlier_size": 0,
|
4480 |
"vector_lens": [
|
4481 |
-1,
|
4482 |
-
|
4483 |
]
|
4484 |
},
|
4485 |
"model.layers.4.self_attn.o_proj": {
|
@@ -4503,7 +4503,7 @@
|
|
4503 |
"outlier_size": 0,
|
4504 |
"vector_lens": [
|
4505 |
-1,
|
4506 |
-
|
4507 |
]
|
4508 |
},
|
4509 |
"model.layers.4.self_attn.q_proj": {
|
@@ -4527,7 +4527,7 @@
|
|
4527 |
"outlier_size": 0,
|
4528 |
"vector_lens": [
|
4529 |
-1,
|
4530 |
-
|
4531 |
]
|
4532 |
},
|
4533 |
"model.layers.4.self_attn.v_proj": {
|
@@ -4551,7 +4551,7 @@
|
|
4551 |
"outlier_size": 0,
|
4552 |
"vector_lens": [
|
4553 |
-1,
|
4554 |
-
|
4555 |
]
|
4556 |
},
|
4557 |
"model.layers.5.mlp.down_proj": {
|
@@ -4575,7 +4575,7 @@
|
|
4575 |
"outlier_size": 0,
|
4576 |
"vector_lens": [
|
4577 |
-1,
|
4578 |
-
|
4579 |
]
|
4580 |
},
|
4581 |
"model.layers.5.mlp.gate_proj": {
|
@@ -4599,7 +4599,7 @@
|
|
4599 |
"outlier_size": 0,
|
4600 |
"vector_lens": [
|
4601 |
-1,
|
4602 |
-
|
4603 |
]
|
4604 |
},
|
4605 |
"model.layers.5.mlp.up_proj": {
|
@@ -4623,7 +4623,7 @@
|
|
4623 |
"outlier_size": 0,
|
4624 |
"vector_lens": [
|
4625 |
-1,
|
4626 |
-
|
4627 |
]
|
4628 |
},
|
4629 |
"model.layers.5.self_attn.k_proj": {
|
@@ -4647,7 +4647,7 @@
|
|
4647 |
"outlier_size": 0,
|
4648 |
"vector_lens": [
|
4649 |
-1,
|
4650 |
-
|
4651 |
]
|
4652 |
},
|
4653 |
"model.layers.5.self_attn.o_proj": {
|
@@ -4671,7 +4671,7 @@
|
|
4671 |
"outlier_size": 0,
|
4672 |
"vector_lens": [
|
4673 |
-1,
|
4674 |
-
|
4675 |
]
|
4676 |
},
|
4677 |
"model.layers.5.self_attn.q_proj": {
|
@@ -4695,7 +4695,7 @@
|
|
4695 |
"outlier_size": 0,
|
4696 |
"vector_lens": [
|
4697 |
-1,
|
4698 |
-
|
4699 |
]
|
4700 |
},
|
4701 |
"model.layers.5.self_attn.v_proj": {
|
@@ -4719,7 +4719,7 @@
|
|
4719 |
"outlier_size": 0,
|
4720 |
"vector_lens": [
|
4721 |
-1,
|
4722 |
-
|
4723 |
]
|
4724 |
},
|
4725 |
"model.layers.6.mlp.down_proj": {
|
@@ -4743,7 +4743,7 @@
|
|
4743 |
"outlier_size": 0,
|
4744 |
"vector_lens": [
|
4745 |
-1,
|
4746 |
-
|
4747 |
]
|
4748 |
},
|
4749 |
"model.layers.6.mlp.gate_proj": {
|
@@ -4767,7 +4767,7 @@
|
|
4767 |
"outlier_size": 0,
|
4768 |
"vector_lens": [
|
4769 |
-1,
|
4770 |
-
|
4771 |
]
|
4772 |
},
|
4773 |
"model.layers.6.mlp.up_proj": {
|
@@ -4791,7 +4791,7 @@
|
|
4791 |
"outlier_size": 0,
|
4792 |
"vector_lens": [
|
4793 |
-1,
|
4794 |
-
|
4795 |
]
|
4796 |
},
|
4797 |
"model.layers.6.self_attn.k_proj": {
|
@@ -4815,7 +4815,7 @@
|
|
4815 |
"outlier_size": 0,
|
4816 |
"vector_lens": [
|
4817 |
-1,
|
4818 |
-
|
4819 |
]
|
4820 |
},
|
4821 |
"model.layers.6.self_attn.o_proj": {
|
@@ -4839,7 +4839,7 @@
|
|
4839 |
"outlier_size": 0,
|
4840 |
"vector_lens": [
|
4841 |
-1,
|
4842 |
-
|
4843 |
]
|
4844 |
},
|
4845 |
"model.layers.6.self_attn.q_proj": {
|
@@ -4863,7 +4863,7 @@
|
|
4863 |
"outlier_size": 0,
|
4864 |
"vector_lens": [
|
4865 |
-1,
|
4866 |
-
|
4867 |
]
|
4868 |
},
|
4869 |
"model.layers.6.self_attn.v_proj": {
|
@@ -4887,7 +4887,7 @@
|
|
4887 |
"outlier_size": 0,
|
4888 |
"vector_lens": [
|
4889 |
-1,
|
4890 |
-
|
4891 |
]
|
4892 |
},
|
4893 |
"model.layers.7.mlp.down_proj": {
|
@@ -4911,7 +4911,7 @@
|
|
4911 |
"outlier_size": 0,
|
4912 |
"vector_lens": [
|
4913 |
-1,
|
4914 |
-
|
4915 |
]
|
4916 |
},
|
4917 |
"model.layers.7.mlp.gate_proj": {
|
@@ -4935,7 +4935,7 @@
|
|
4935 |
"outlier_size": 0,
|
4936 |
"vector_lens": [
|
4937 |
-1,
|
4938 |
-
|
4939 |
]
|
4940 |
},
|
4941 |
"model.layers.7.mlp.up_proj": {
|
@@ -4959,7 +4959,7 @@
|
|
4959 |
"outlier_size": 0,
|
4960 |
"vector_lens": [
|
4961 |
-1,
|
4962 |
-
|
4963 |
]
|
4964 |
},
|
4965 |
"model.layers.7.self_attn.k_proj": {
|
@@ -4983,7 +4983,7 @@
|
|
4983 |
"outlier_size": 0,
|
4984 |
"vector_lens": [
|
4985 |
-1,
|
4986 |
-
|
4987 |
]
|
4988 |
},
|
4989 |
"model.layers.7.self_attn.o_proj": {
|
@@ -5007,7 +5007,7 @@
|
|
5007 |
"outlier_size": 0,
|
5008 |
"vector_lens": [
|
5009 |
-1,
|
5010 |
-
|
5011 |
]
|
5012 |
},
|
5013 |
"model.layers.7.self_attn.q_proj": {
|
@@ -5031,7 +5031,7 @@
|
|
5031 |
"outlier_size": 0,
|
5032 |
"vector_lens": [
|
5033 |
-1,
|
5034 |
-
|
5035 |
]
|
5036 |
},
|
5037 |
"model.layers.7.self_attn.v_proj": {
|
@@ -5055,7 +5055,7 @@
|
|
5055 |
"outlier_size": 0,
|
5056 |
"vector_lens": [
|
5057 |
-1,
|
5058 |
-
|
5059 |
]
|
5060 |
},
|
5061 |
"model.layers.8.mlp.down_proj": {
|
@@ -5079,7 +5079,7 @@
|
|
5079 |
"outlier_size": 0,
|
5080 |
"vector_lens": [
|
5081 |
-1,
|
5082 |
-
|
5083 |
]
|
5084 |
},
|
5085 |
"model.layers.8.mlp.gate_proj": {
|
@@ -5103,7 +5103,7 @@
|
|
5103 |
"outlier_size": 0,
|
5104 |
"vector_lens": [
|
5105 |
-1,
|
5106 |
-
|
5107 |
]
|
5108 |
},
|
5109 |
"model.layers.8.mlp.up_proj": {
|
@@ -5127,7 +5127,7 @@
|
|
5127 |
"outlier_size": 0,
|
5128 |
"vector_lens": [
|
5129 |
-1,
|
5130 |
-
|
5131 |
]
|
5132 |
},
|
5133 |
"model.layers.8.self_attn.k_proj": {
|
@@ -5151,7 +5151,7 @@
|
|
5151 |
"outlier_size": 0,
|
5152 |
"vector_lens": [
|
5153 |
-1,
|
5154 |
-
|
5155 |
]
|
5156 |
},
|
5157 |
"model.layers.8.self_attn.o_proj": {
|
@@ -5175,7 +5175,7 @@
|
|
5175 |
"outlier_size": 0,
|
5176 |
"vector_lens": [
|
5177 |
-1,
|
5178 |
-
|
5179 |
]
|
5180 |
},
|
5181 |
"model.layers.8.self_attn.q_proj": {
|
@@ -5199,7 +5199,7 @@
|
|
5199 |
"outlier_size": 0,
|
5200 |
"vector_lens": [
|
5201 |
-1,
|
5202 |
-
|
5203 |
]
|
5204 |
},
|
5205 |
"model.layers.8.self_attn.v_proj": {
|
@@ -5223,7 +5223,7 @@
|
|
5223 |
"outlier_size": 0,
|
5224 |
"vector_lens": [
|
5225 |
-1,
|
5226 |
-
|
5227 |
]
|
5228 |
},
|
5229 |
"model.layers.9.mlp.down_proj": {
|
@@ -5247,7 +5247,7 @@
|
|
5247 |
"outlier_size": 0,
|
5248 |
"vector_lens": [
|
5249 |
-1,
|
5250 |
-
|
5251 |
]
|
5252 |
},
|
5253 |
"model.layers.9.mlp.gate_proj": {
|
@@ -5271,7 +5271,7 @@
|
|
5271 |
"outlier_size": 0,
|
5272 |
"vector_lens": [
|
5273 |
-1,
|
5274 |
-
|
5275 |
]
|
5276 |
},
|
5277 |
"model.layers.9.mlp.up_proj": {
|
@@ -5295,7 +5295,7 @@
|
|
5295 |
"outlier_size": 0,
|
5296 |
"vector_lens": [
|
5297 |
-1,
|
5298 |
-
|
5299 |
]
|
5300 |
},
|
5301 |
"model.layers.9.self_attn.k_proj": {
|
@@ -5319,7 +5319,7 @@
|
|
5319 |
"outlier_size": 0,
|
5320 |
"vector_lens": [
|
5321 |
-1,
|
5322 |
-
|
5323 |
]
|
5324 |
},
|
5325 |
"model.layers.9.self_attn.o_proj": {
|
@@ -5343,7 +5343,7 @@
|
|
5343 |
"outlier_size": 0,
|
5344 |
"vector_lens": [
|
5345 |
-1,
|
5346 |
-
|
5347 |
]
|
5348 |
},
|
5349 |
"model.layers.9.self_attn.q_proj": {
|
@@ -5367,7 +5367,7 @@
|
|
5367 |
"outlier_size": 0,
|
5368 |
"vector_lens": [
|
5369 |
-1,
|
5370 |
-
|
5371 |
]
|
5372 |
},
|
5373 |
"model.layers.9.self_attn.v_proj": {
|
@@ -5391,7 +5391,7 @@
|
|
5391 |
"outlier_size": 0,
|
5392 |
"vector_lens": [
|
5393 |
-1,
|
5394 |
-
|
5395 |
]
|
5396 |
}
|
5397 |
},
|
|
|
39 |
"outlier_size": 0,
|
40 |
"vector_lens": [
|
41 |
-1,
|
42 |
+
4
|
43 |
]
|
44 |
},
|
45 |
"model.layers.0.mlp.gate_proj": {
|
|
|
63 |
"outlier_size": 0,
|
64 |
"vector_lens": [
|
65 |
-1,
|
66 |
+
4
|
67 |
]
|
68 |
},
|
69 |
"model.layers.0.mlp.up_proj": {
|
|
|
87 |
"outlier_size": 0,
|
88 |
"vector_lens": [
|
89 |
-1,
|
90 |
+
4
|
91 |
]
|
92 |
},
|
93 |
"model.layers.0.self_attn.k_proj": {
|
|
|
111 |
"outlier_size": 0,
|
112 |
"vector_lens": [
|
113 |
-1,
|
114 |
+
4
|
115 |
]
|
116 |
},
|
117 |
"model.layers.0.self_attn.o_proj": {
|
|
|
135 |
"outlier_size": 0,
|
136 |
"vector_lens": [
|
137 |
-1,
|
138 |
+
4
|
139 |
]
|
140 |
},
|
141 |
"model.layers.0.self_attn.q_proj": {
|
|
|
159 |
"outlier_size": 0,
|
160 |
"vector_lens": [
|
161 |
-1,
|
162 |
+
4
|
163 |
]
|
164 |
},
|
165 |
"model.layers.0.self_attn.v_proj": {
|
|
|
183 |
"outlier_size": 0,
|
184 |
"vector_lens": [
|
185 |
-1,
|
186 |
+
4
|
187 |
]
|
188 |
},
|
189 |
"model.layers.1.mlp.down_proj": {
|
|
|
207 |
"outlier_size": 0,
|
208 |
"vector_lens": [
|
209 |
-1,
|
210 |
+
4
|
211 |
]
|
212 |
},
|
213 |
"model.layers.1.mlp.gate_proj": {
|
|
|
231 |
"outlier_size": 0,
|
232 |
"vector_lens": [
|
233 |
-1,
|
234 |
+
4
|
235 |
]
|
236 |
},
|
237 |
"model.layers.1.mlp.up_proj": {
|
|
|
255 |
"outlier_size": 0,
|
256 |
"vector_lens": [
|
257 |
-1,
|
258 |
+
4
|
259 |
]
|
260 |
},
|
261 |
"model.layers.1.self_attn.k_proj": {
|
|
|
279 |
"outlier_size": 0,
|
280 |
"vector_lens": [
|
281 |
-1,
|
282 |
+
4
|
283 |
]
|
284 |
},
|
285 |
"model.layers.1.self_attn.o_proj": {
|
|
|
303 |
"outlier_size": 0,
|
304 |
"vector_lens": [
|
305 |
-1,
|
306 |
+
4
|
307 |
]
|
308 |
},
|
309 |
"model.layers.1.self_attn.q_proj": {
|
|
|
327 |
"outlier_size": 0,
|
328 |
"vector_lens": [
|
329 |
-1,
|
330 |
+
4
|
331 |
]
|
332 |
},
|
333 |
"model.layers.1.self_attn.v_proj": {
|
|
|
351 |
"outlier_size": 0,
|
352 |
"vector_lens": [
|
353 |
-1,
|
354 |
+
4
|
355 |
]
|
356 |
},
|
357 |
"model.layers.10.mlp.down_proj": {
|
|
|
375 |
"outlier_size": 0,
|
376 |
"vector_lens": [
|
377 |
-1,
|
378 |
+
4
|
379 |
]
|
380 |
},
|
381 |
"model.layers.10.mlp.gate_proj": {
|
|
|
399 |
"outlier_size": 0,
|
400 |
"vector_lens": [
|
401 |
-1,
|
402 |
+
4
|
403 |
]
|
404 |
},
|
405 |
"model.layers.10.mlp.up_proj": {
|
|
|
423 |
"outlier_size": 0,
|
424 |
"vector_lens": [
|
425 |
-1,
|
426 |
+
4
|
427 |
]
|
428 |
},
|
429 |
"model.layers.10.self_attn.k_proj": {
|
|
|
447 |
"outlier_size": 0,
|
448 |
"vector_lens": [
|
449 |
-1,
|
450 |
+
4
|
451 |
]
|
452 |
},
|
453 |
"model.layers.10.self_attn.o_proj": {
|
|
|
471 |
"outlier_size": 0,
|
472 |
"vector_lens": [
|
473 |
-1,
|
474 |
+
4
|
475 |
]
|
476 |
},
|
477 |
"model.layers.10.self_attn.q_proj": {
|
|
|
495 |
"outlier_size": 0,
|
496 |
"vector_lens": [
|
497 |
-1,
|
498 |
+
4
|
499 |
]
|
500 |
},
|
501 |
"model.layers.10.self_attn.v_proj": {
|
|
|
519 |
"outlier_size": 0,
|
520 |
"vector_lens": [
|
521 |
-1,
|
522 |
+
4
|
523 |
]
|
524 |
},
|
525 |
"model.layers.11.mlp.down_proj": {
|
|
|
543 |
"outlier_size": 0,
|
544 |
"vector_lens": [
|
545 |
-1,
|
546 |
+
4
|
547 |
]
|
548 |
},
|
549 |
"model.layers.11.mlp.gate_proj": {
|
|
|
567 |
"outlier_size": 0,
|
568 |
"vector_lens": [
|
569 |
-1,
|
570 |
+
4
|
571 |
]
|
572 |
},
|
573 |
"model.layers.11.mlp.up_proj": {
|
|
|
591 |
"outlier_size": 0,
|
592 |
"vector_lens": [
|
593 |
-1,
|
594 |
+
4
|
595 |
]
|
596 |
},
|
597 |
"model.layers.11.self_attn.k_proj": {
|
|
|
615 |
"outlier_size": 0,
|
616 |
"vector_lens": [
|
617 |
-1,
|
618 |
+
4
|
619 |
]
|
620 |
},
|
621 |
"model.layers.11.self_attn.o_proj": {
|
|
|
639 |
"outlier_size": 0,
|
640 |
"vector_lens": [
|
641 |
-1,
|
642 |
+
4
|
643 |
]
|
644 |
},
|
645 |
"model.layers.11.self_attn.q_proj": {
|
|
|
663 |
"outlier_size": 0,
|
664 |
"vector_lens": [
|
665 |
-1,
|
666 |
+
4
|
667 |
]
|
668 |
},
|
669 |
"model.layers.11.self_attn.v_proj": {
|
|
|
687 |
"outlier_size": 0,
|
688 |
"vector_lens": [
|
689 |
-1,
|
690 |
+
4
|
691 |
]
|
692 |
},
|
693 |
"model.layers.12.mlp.down_proj": {
|
|
|
711 |
"outlier_size": 0,
|
712 |
"vector_lens": [
|
713 |
-1,
|
714 |
+
4
|
715 |
]
|
716 |
},
|
717 |
"model.layers.12.mlp.gate_proj": {
|
|
|
735 |
"outlier_size": 0,
|
736 |
"vector_lens": [
|
737 |
-1,
|
738 |
+
4
|
739 |
]
|
740 |
},
|
741 |
"model.layers.12.mlp.up_proj": {
|
|
|
759 |
"outlier_size": 0,
|
760 |
"vector_lens": [
|
761 |
-1,
|
762 |
+
4
|
763 |
]
|
764 |
},
|
765 |
"model.layers.12.self_attn.k_proj": {
|
|
|
783 |
"outlier_size": 0,
|
784 |
"vector_lens": [
|
785 |
-1,
|
786 |
+
4
|
787 |
]
|
788 |
},
|
789 |
"model.layers.12.self_attn.o_proj": {
|
|
|
807 |
"outlier_size": 0,
|
808 |
"vector_lens": [
|
809 |
-1,
|
810 |
+
4
|
811 |
]
|
812 |
},
|
813 |
"model.layers.12.self_attn.q_proj": {
|
|
|
831 |
"outlier_size": 0,
|
832 |
"vector_lens": [
|
833 |
-1,
|
834 |
+
4
|
835 |
]
|
836 |
},
|
837 |
"model.layers.12.self_attn.v_proj": {
|
|
|
855 |
"outlier_size": 0,
|
856 |
"vector_lens": [
|
857 |
-1,
|
858 |
+
4
|
859 |
]
|
860 |
},
|
861 |
"model.layers.13.mlp.down_proj": {
|
|
|
879 |
"outlier_size": 0,
|
880 |
"vector_lens": [
|
881 |
-1,
|
882 |
+
4
|
883 |
]
|
884 |
},
|
885 |
"model.layers.13.mlp.gate_proj": {
|
|
|
903 |
"outlier_size": 0,
|
904 |
"vector_lens": [
|
905 |
-1,
|
906 |
+
4
|
907 |
]
|
908 |
},
|
909 |
"model.layers.13.mlp.up_proj": {
|
|
|
927 |
"outlier_size": 0,
|
928 |
"vector_lens": [
|
929 |
-1,
|
930 |
+
4
|
931 |
]
|
932 |
},
|
933 |
"model.layers.13.self_attn.k_proj": {
|
|
|
951 |
"outlier_size": 0,
|
952 |
"vector_lens": [
|
953 |
-1,
|
954 |
+
4
|
955 |
]
|
956 |
},
|
957 |
"model.layers.13.self_attn.o_proj": {
|
|
|
975 |
"outlier_size": 0,
|
976 |
"vector_lens": [
|
977 |
-1,
|
978 |
+
4
|
979 |
]
|
980 |
},
|
981 |
"model.layers.13.self_attn.q_proj": {
|
|
|
999 |
"outlier_size": 0,
|
1000 |
"vector_lens": [
|
1001 |
-1,
|
1002 |
+
4
|
1003 |
]
|
1004 |
},
|
1005 |
"model.layers.13.self_attn.v_proj": {
|
|
|
1023 |
"outlier_size": 0,
|
1024 |
"vector_lens": [
|
1025 |
-1,
|
1026 |
+
4
|
1027 |
]
|
1028 |
},
|
1029 |
"model.layers.14.mlp.down_proj": {
|
|
|
1047 |
"outlier_size": 0,
|
1048 |
"vector_lens": [
|
1049 |
-1,
|
1050 |
+
4
|
1051 |
]
|
1052 |
},
|
1053 |
"model.layers.14.mlp.gate_proj": {
|
|
|
1071 |
"outlier_size": 0,
|
1072 |
"vector_lens": [
|
1073 |
-1,
|
1074 |
+
4
|
1075 |
]
|
1076 |
},
|
1077 |
"model.layers.14.mlp.up_proj": {
|
|
|
1095 |
"outlier_size": 0,
|
1096 |
"vector_lens": [
|
1097 |
-1,
|
1098 |
+
4
|
1099 |
]
|
1100 |
},
|
1101 |
"model.layers.14.self_attn.k_proj": {
|
|
|
1119 |
"outlier_size": 0,
|
1120 |
"vector_lens": [
|
1121 |
-1,
|
1122 |
+
4
|
1123 |
]
|
1124 |
},
|
1125 |
"model.layers.14.self_attn.o_proj": {
|
|
|
1143 |
"outlier_size": 0,
|
1144 |
"vector_lens": [
|
1145 |
-1,
|
1146 |
+
4
|
1147 |
]
|
1148 |
},
|
1149 |
"model.layers.14.self_attn.q_proj": {
|
|
|
1167 |
"outlier_size": 0,
|
1168 |
"vector_lens": [
|
1169 |
-1,
|
1170 |
+
4
|
1171 |
]
|
1172 |
},
|
1173 |
"model.layers.14.self_attn.v_proj": {
|
|
|
1191 |
"outlier_size": 0,
|
1192 |
"vector_lens": [
|
1193 |
-1,
|
1194 |
+
4
|
1195 |
]
|
1196 |
},
|
1197 |
"model.layers.15.mlp.down_proj": {
|
|
|
1215 |
"outlier_size": 0,
|
1216 |
"vector_lens": [
|
1217 |
-1,
|
1218 |
+
4
|
1219 |
]
|
1220 |
},
|
1221 |
"model.layers.15.mlp.gate_proj": {
|
|
|
1239 |
"outlier_size": 0,
|
1240 |
"vector_lens": [
|
1241 |
-1,
|
1242 |
+
4
|
1243 |
]
|
1244 |
},
|
1245 |
"model.layers.15.mlp.up_proj": {
|
|
|
1263 |
"outlier_size": 0,
|
1264 |
"vector_lens": [
|
1265 |
-1,
|
1266 |
+
4
|
1267 |
]
|
1268 |
},
|
1269 |
"model.layers.15.self_attn.k_proj": {
|
|
|
1287 |
"outlier_size": 0,
|
1288 |
"vector_lens": [
|
1289 |
-1,
|
1290 |
+
4
|
1291 |
]
|
1292 |
},
|
1293 |
"model.layers.15.self_attn.o_proj": {
|
|
|
1311 |
"outlier_size": 0,
|
1312 |
"vector_lens": [
|
1313 |
-1,
|
1314 |
+
4
|
1315 |
]
|
1316 |
},
|
1317 |
"model.layers.15.self_attn.q_proj": {
|
|
|
1335 |
"outlier_size": 0,
|
1336 |
"vector_lens": [
|
1337 |
-1,
|
1338 |
+
4
|
1339 |
]
|
1340 |
},
|
1341 |
"model.layers.15.self_attn.v_proj": {
|
|
|
1359 |
"outlier_size": 0,
|
1360 |
"vector_lens": [
|
1361 |
-1,
|
1362 |
+
4
|
1363 |
]
|
1364 |
},
|
1365 |
"model.layers.16.mlp.down_proj": {
|
|
|
1383 |
"outlier_size": 0,
|
1384 |
"vector_lens": [
|
1385 |
-1,
|
1386 |
+
4
|
1387 |
]
|
1388 |
},
|
1389 |
"model.layers.16.mlp.gate_proj": {
|
|
|
1407 |
"outlier_size": 0,
|
1408 |
"vector_lens": [
|
1409 |
-1,
|
1410 |
+
4
|
1411 |
]
|
1412 |
},
|
1413 |
"model.layers.16.mlp.up_proj": {
|
|
|
1431 |
"outlier_size": 0,
|
1432 |
"vector_lens": [
|
1433 |
-1,
|
1434 |
+
4
|
1435 |
]
|
1436 |
},
|
1437 |
"model.layers.16.self_attn.k_proj": {
|
|
|
1455 |
"outlier_size": 0,
|
1456 |
"vector_lens": [
|
1457 |
-1,
|
1458 |
+
4
|
1459 |
]
|
1460 |
},
|
1461 |
"model.layers.16.self_attn.o_proj": {
|
|
|
1479 |
"outlier_size": 0,
|
1480 |
"vector_lens": [
|
1481 |
-1,
|
1482 |
+
4
|
1483 |
]
|
1484 |
},
|
1485 |
"model.layers.16.self_attn.q_proj": {
|
|
|
1503 |
"outlier_size": 0,
|
1504 |
"vector_lens": [
|
1505 |
-1,
|
1506 |
+
4
|
1507 |
]
|
1508 |
},
|
1509 |
"model.layers.16.self_attn.v_proj": {
|
|
|
1527 |
"outlier_size": 0,
|
1528 |
"vector_lens": [
|
1529 |
-1,
|
1530 |
+
4
|
1531 |
]
|
1532 |
},
|
1533 |
"model.layers.17.mlp.down_proj": {
|
|
|
1551 |
"outlier_size": 0,
|
1552 |
"vector_lens": [
|
1553 |
-1,
|
1554 |
+
4
|
1555 |
]
|
1556 |
},
|
1557 |
"model.layers.17.mlp.gate_proj": {
|
|
|
1575 |
"outlier_size": 0,
|
1576 |
"vector_lens": [
|
1577 |
-1,
|
1578 |
+
4
|
1579 |
]
|
1580 |
},
|
1581 |
"model.layers.17.mlp.up_proj": {
|
|
|
1599 |
"outlier_size": 0,
|
1600 |
"vector_lens": [
|
1601 |
-1,
|
1602 |
+
4
|
1603 |
]
|
1604 |
},
|
1605 |
"model.layers.17.self_attn.k_proj": {
|
|
|
1623 |
"outlier_size": 0,
|
1624 |
"vector_lens": [
|
1625 |
-1,
|
1626 |
+
4
|
1627 |
]
|
1628 |
},
|
1629 |
"model.layers.17.self_attn.o_proj": {
|
|
|
1647 |
"outlier_size": 0,
|
1648 |
"vector_lens": [
|
1649 |
-1,
|
1650 |
+
4
|
1651 |
]
|
1652 |
},
|
1653 |
"model.layers.17.self_attn.q_proj": {
|
|
|
1671 |
"outlier_size": 0,
|
1672 |
"vector_lens": [
|
1673 |
-1,
|
1674 |
+
4
|
1675 |
]
|
1676 |
},
|
1677 |
"model.layers.17.self_attn.v_proj": {
|
|
|
1695 |
"outlier_size": 0,
|
1696 |
"vector_lens": [
|
1697 |
-1,
|
1698 |
+
4
|
1699 |
]
|
1700 |
},
|
1701 |
"model.layers.18.mlp.down_proj": {
|
|
|
1719 |
"outlier_size": 0,
|
1720 |
"vector_lens": [
|
1721 |
-1,
|
1722 |
+
4
|
1723 |
]
|
1724 |
},
|
1725 |
"model.layers.18.mlp.gate_proj": {
|
|
|
1743 |
"outlier_size": 0,
|
1744 |
"vector_lens": [
|
1745 |
-1,
|
1746 |
+
4
|
1747 |
]
|
1748 |
},
|
1749 |
"model.layers.18.mlp.up_proj": {
|
|
|
1767 |
"outlier_size": 0,
|
1768 |
"vector_lens": [
|
1769 |
-1,
|
1770 |
+
4
|
1771 |
]
|
1772 |
},
|
1773 |
"model.layers.18.self_attn.k_proj": {
|
|
|
1791 |
"outlier_size": 0,
|
1792 |
"vector_lens": [
|
1793 |
-1,
|
1794 |
+
4
|
1795 |
]
|
1796 |
},
|
1797 |
"model.layers.18.self_attn.o_proj": {
|
|
|
1815 |
"outlier_size": 0,
|
1816 |
"vector_lens": [
|
1817 |
-1,
|
1818 |
+
4
|
1819 |
]
|
1820 |
},
|
1821 |
"model.layers.18.self_attn.q_proj": {
|
|
|
1839 |
"outlier_size": 0,
|
1840 |
"vector_lens": [
|
1841 |
-1,
|
1842 |
+
4
|
1843 |
]
|
1844 |
},
|
1845 |
"model.layers.18.self_attn.v_proj": {
|
|
|
1863 |
"outlier_size": 0,
|
1864 |
"vector_lens": [
|
1865 |
-1,
|
1866 |
+
4
|
1867 |
]
|
1868 |
},
|
1869 |
"model.layers.19.mlp.down_proj": {
|
|
|
1887 |
"outlier_size": 0,
|
1888 |
"vector_lens": [
|
1889 |
-1,
|
1890 |
+
4
|
1891 |
]
|
1892 |
},
|
1893 |
"model.layers.19.mlp.gate_proj": {
|
|
|
1911 |
"outlier_size": 0,
|
1912 |
"vector_lens": [
|
1913 |
-1,
|
1914 |
+
4
|
1915 |
]
|
1916 |
},
|
1917 |
"model.layers.19.mlp.up_proj": {
|
|
|
1935 |
"outlier_size": 0,
|
1936 |
"vector_lens": [
|
1937 |
-1,
|
1938 |
+
4
|
1939 |
]
|
1940 |
},
|
1941 |
"model.layers.19.self_attn.k_proj": {
|
|
|
1959 |
"outlier_size": 0,
|
1960 |
"vector_lens": [
|
1961 |
-1,
|
1962 |
+
4
|
1963 |
]
|
1964 |
},
|
1965 |
"model.layers.19.self_attn.o_proj": {
|
|
|
1983 |
"outlier_size": 0,
|
1984 |
"vector_lens": [
|
1985 |
-1,
|
1986 |
+
4
|
1987 |
]
|
1988 |
},
|
1989 |
"model.layers.19.self_attn.q_proj": {
|
|
|
2007 |
"outlier_size": 0,
|
2008 |
"vector_lens": [
|
2009 |
-1,
|
2010 |
+
4
|
2011 |
]
|
2012 |
},
|
2013 |
"model.layers.19.self_attn.v_proj": {
|
|
|
2031 |
"outlier_size": 0,
|
2032 |
"vector_lens": [
|
2033 |
-1,
|
2034 |
+
4
|
2035 |
]
|
2036 |
},
|
2037 |
"model.layers.2.mlp.down_proj": {
|
|
|
2055 |
"outlier_size": 0,
|
2056 |
"vector_lens": [
|
2057 |
-1,
|
2058 |
+
4
|
2059 |
]
|
2060 |
},
|
2061 |
"model.layers.2.mlp.gate_proj": {
|
|
|
2079 |
"outlier_size": 0,
|
2080 |
"vector_lens": [
|
2081 |
-1,
|
2082 |
+
4
|
2083 |
]
|
2084 |
},
|
2085 |
"model.layers.2.mlp.up_proj": {
|
|
|
2103 |
"outlier_size": 0,
|
2104 |
"vector_lens": [
|
2105 |
-1,
|
2106 |
+
4
|
2107 |
]
|
2108 |
},
|
2109 |
"model.layers.2.self_attn.k_proj": {
|
|
|
2127 |
"outlier_size": 0,
|
2128 |
"vector_lens": [
|
2129 |
-1,
|
2130 |
+
4
|
2131 |
]
|
2132 |
},
|
2133 |
"model.layers.2.self_attn.o_proj": {
|
|
|
2151 |
"outlier_size": 0,
|
2152 |
"vector_lens": [
|
2153 |
-1,
|
2154 |
+
4
|
2155 |
]
|
2156 |
},
|
2157 |
"model.layers.2.self_attn.q_proj": {
|
|
|
2175 |
"outlier_size": 0,
|
2176 |
"vector_lens": [
|
2177 |
-1,
|
2178 |
+
4
|
2179 |
]
|
2180 |
},
|
2181 |
"model.layers.2.self_attn.v_proj": {
|
|
|
2199 |
"outlier_size": 0,
|
2200 |
"vector_lens": [
|
2201 |
-1,
|
2202 |
+
4
|
2203 |
]
|
2204 |
},
|
2205 |
"model.layers.20.mlp.down_proj": {
|
|
|
2223 |
"outlier_size": 0,
|
2224 |
"vector_lens": [
|
2225 |
-1,
|
2226 |
+
4
|
2227 |
]
|
2228 |
},
|
2229 |
"model.layers.20.mlp.gate_proj": {
|
|
|
2247 |
"outlier_size": 0,
|
2248 |
"vector_lens": [
|
2249 |
-1,
|
2250 |
+
4
|
2251 |
]
|
2252 |
},
|
2253 |
"model.layers.20.mlp.up_proj": {
|
|
|
2271 |
"outlier_size": 0,
|
2272 |
"vector_lens": [
|
2273 |
-1,
|
2274 |
+
4
|
2275 |
]
|
2276 |
},
|
2277 |
"model.layers.20.self_attn.k_proj": {
|
|
|
2295 |
"outlier_size": 0,
|
2296 |
"vector_lens": [
|
2297 |
-1,
|
2298 |
+
4
|
2299 |
]
|
2300 |
},
|
2301 |
"model.layers.20.self_attn.o_proj": {
|
|
|
2319 |
"outlier_size": 0,
|
2320 |
"vector_lens": [
|
2321 |
-1,
|
2322 |
+
4
|
2323 |
]
|
2324 |
},
|
2325 |
"model.layers.20.self_attn.q_proj": {
|
|
|
2343 |
"outlier_size": 0,
|
2344 |
"vector_lens": [
|
2345 |
-1,
|
2346 |
+
4
|
2347 |
]
|
2348 |
},
|
2349 |
"model.layers.20.self_attn.v_proj": {
|
|
|
2367 |
"outlier_size": 0,
|
2368 |
"vector_lens": [
|
2369 |
-1,
|
2370 |
+
4
|
2371 |
]
|
2372 |
},
|
2373 |
"model.layers.21.mlp.down_proj": {
|
|
|
2391 |
"outlier_size": 0,
|
2392 |
"vector_lens": [
|
2393 |
-1,
|
2394 |
+
4
|
2395 |
]
|
2396 |
},
|
2397 |
"model.layers.21.mlp.gate_proj": {
|
|
|
2415 |
"outlier_size": 0,
|
2416 |
"vector_lens": [
|
2417 |
-1,
|
2418 |
+
4
|
2419 |
]
|
2420 |
},
|
2421 |
"model.layers.21.mlp.up_proj": {
|
|
|
2439 |
"outlier_size": 0,
|
2440 |
"vector_lens": [
|
2441 |
-1,
|
2442 |
+
4
|
2443 |
]
|
2444 |
},
|
2445 |
"model.layers.21.self_attn.k_proj": {
|
|
|
2463 |
"outlier_size": 0,
|
2464 |
"vector_lens": [
|
2465 |
-1,
|
2466 |
+
4
|
2467 |
]
|
2468 |
},
|
2469 |
"model.layers.21.self_attn.o_proj": {
|
|
|
2487 |
"outlier_size": 0,
|
2488 |
"vector_lens": [
|
2489 |
-1,
|
2490 |
+
4
|
2491 |
]
|
2492 |
},
|
2493 |
"model.layers.21.self_attn.q_proj": {
|
|
|
2511 |
"outlier_size": 0,
|
2512 |
"vector_lens": [
|
2513 |
-1,
|
2514 |
+
4
|
2515 |
]
|
2516 |
},
|
2517 |
"model.layers.21.self_attn.v_proj": {
|
|
|
2535 |
"outlier_size": 0,
|
2536 |
"vector_lens": [
|
2537 |
-1,
|
2538 |
+
4
|
2539 |
]
|
2540 |
},
|
2541 |
"model.layers.22.mlp.down_proj": {
|
|
|
2559 |
"outlier_size": 0,
|
2560 |
"vector_lens": [
|
2561 |
-1,
|
2562 |
+
4
|
2563 |
]
|
2564 |
},
|
2565 |
"model.layers.22.mlp.gate_proj": {
|
|
|
2583 |
"outlier_size": 0,
|
2584 |
"vector_lens": [
|
2585 |
-1,
|
2586 |
+
4
|
2587 |
]
|
2588 |
},
|
2589 |
"model.layers.22.mlp.up_proj": {
|
|
|
2607 |
"outlier_size": 0,
|
2608 |
"vector_lens": [
|
2609 |
-1,
|
2610 |
+
4
|
2611 |
]
|
2612 |
},
|
2613 |
"model.layers.22.self_attn.k_proj": {
|
|
|
2631 |
"outlier_size": 0,
|
2632 |
"vector_lens": [
|
2633 |
-1,
|
2634 |
+
4
|
2635 |
]
|
2636 |
},
|
2637 |
"model.layers.22.self_attn.o_proj": {
|
|
|
2655 |
"outlier_size": 0,
|
2656 |
"vector_lens": [
|
2657 |
-1,
|
2658 |
+
4
|
2659 |
]
|
2660 |
},
|
2661 |
"model.layers.22.self_attn.q_proj": {
|
|
|
2679 |
"outlier_size": 0,
|
2680 |
"vector_lens": [
|
2681 |
-1,
|
2682 |
+
4
|
2683 |
]
|
2684 |
},
|
2685 |
"model.layers.22.self_attn.v_proj": {
|
|
|
2703 |
"outlier_size": 0,
|
2704 |
"vector_lens": [
|
2705 |
-1,
|
2706 |
+
4
|
2707 |
]
|
2708 |
},
|
2709 |
"model.layers.23.mlp.down_proj": {
|
|
|
2727 |
"outlier_size": 0,
|
2728 |
"vector_lens": [
|
2729 |
-1,
|
2730 |
+
4
|
2731 |
]
|
2732 |
},
|
2733 |
"model.layers.23.mlp.gate_proj": {
|
|
|
2751 |
"outlier_size": 0,
|
2752 |
"vector_lens": [
|
2753 |
-1,
|
2754 |
+
4
|
2755 |
]
|
2756 |
},
|
2757 |
"model.layers.23.mlp.up_proj": {
|
|
|
2775 |
"outlier_size": 0,
|
2776 |
"vector_lens": [
|
2777 |
-1,
|
2778 |
+
4
|
2779 |
]
|
2780 |
},
|
2781 |
"model.layers.23.self_attn.k_proj": {
|
|
|
2799 |
"outlier_size": 0,
|
2800 |
"vector_lens": [
|
2801 |
-1,
|
2802 |
+
4
|
2803 |
]
|
2804 |
},
|
2805 |
"model.layers.23.self_attn.o_proj": {
|
|
|
2823 |
"outlier_size": 0,
|
2824 |
"vector_lens": [
|
2825 |
-1,
|
2826 |
+
4
|
2827 |
]
|
2828 |
},
|
2829 |
"model.layers.23.self_attn.q_proj": {
|
|
|
2847 |
"outlier_size": 0,
|
2848 |
"vector_lens": [
|
2849 |
-1,
|
2850 |
+
4
|
2851 |
]
|
2852 |
},
|
2853 |
"model.layers.23.self_attn.v_proj": {
|
|
|
2871 |
"outlier_size": 0,
|
2872 |
"vector_lens": [
|
2873 |
-1,
|
2874 |
+
4
|
2875 |
]
|
2876 |
},
|
2877 |
"model.layers.24.mlp.down_proj": {
|
|
|
2895 |
"outlier_size": 0,
|
2896 |
"vector_lens": [
|
2897 |
-1,
|
2898 |
+
4
|
2899 |
]
|
2900 |
},
|
2901 |
"model.layers.24.mlp.gate_proj": {
|
|
|
2919 |
"outlier_size": 0,
|
2920 |
"vector_lens": [
|
2921 |
-1,
|
2922 |
+
4
|
2923 |
]
|
2924 |
},
|
2925 |
"model.layers.24.mlp.up_proj": {
|
|
|
2943 |
"outlier_size": 0,
|
2944 |
"vector_lens": [
|
2945 |
-1,
|
2946 |
+
4
|
2947 |
]
|
2948 |
},
|
2949 |
"model.layers.24.self_attn.k_proj": {
|
|
|
2967 |
"outlier_size": 0,
|
2968 |
"vector_lens": [
|
2969 |
-1,
|
2970 |
+
4
|
2971 |
]
|
2972 |
},
|
2973 |
"model.layers.24.self_attn.o_proj": {
|
|
|
2991 |
"outlier_size": 0,
|
2992 |
"vector_lens": [
|
2993 |
-1,
|
2994 |
+
4
|
2995 |
]
|
2996 |
},
|
2997 |
"model.layers.24.self_attn.q_proj": {
|
|
|
3015 |
"outlier_size": 0,
|
3016 |
"vector_lens": [
|
3017 |
-1,
|
3018 |
+
4
|
3019 |
]
|
3020 |
},
|
3021 |
"model.layers.24.self_attn.v_proj": {
|
|
|
3039 |
"outlier_size": 0,
|
3040 |
"vector_lens": [
|
3041 |
-1,
|
3042 |
+
4
|
3043 |
]
|
3044 |
},
|
3045 |
"model.layers.25.mlp.down_proj": {
|
|
|
3063 |
"outlier_size": 0,
|
3064 |
"vector_lens": [
|
3065 |
-1,
|
3066 |
+
4
|
3067 |
]
|
3068 |
},
|
3069 |
"model.layers.25.mlp.gate_proj": {
|
|
|
3087 |
"outlier_size": 0,
|
3088 |
"vector_lens": [
|
3089 |
-1,
|
3090 |
+
4
|
3091 |
]
|
3092 |
},
|
3093 |
"model.layers.25.mlp.up_proj": {
|
|
|
3111 |
"outlier_size": 0,
|
3112 |
"vector_lens": [
|
3113 |
-1,
|
3114 |
+
4
|
3115 |
]
|
3116 |
},
|
3117 |
"model.layers.25.self_attn.k_proj": {
|
|
|
3135 |
"outlier_size": 0,
|
3136 |
"vector_lens": [
|
3137 |
-1,
|
3138 |
+
4
|
3139 |
]
|
3140 |
},
|
3141 |
"model.layers.25.self_attn.o_proj": {
|
|
|
3159 |
"outlier_size": 0,
|
3160 |
"vector_lens": [
|
3161 |
-1,
|
3162 |
+
4
|
3163 |
]
|
3164 |
},
|
3165 |
"model.layers.25.self_attn.q_proj": {
|
|
|
3183 |
"outlier_size": 0,
|
3184 |
"vector_lens": [
|
3185 |
-1,
|
3186 |
+
4
|
3187 |
]
|
3188 |
},
|
3189 |
"model.layers.25.self_attn.v_proj": {
|
|
|
3207 |
"outlier_size": 0,
|
3208 |
"vector_lens": [
|
3209 |
-1,
|
3210 |
+
4
|
3211 |
]
|
3212 |
},
|
3213 |
"model.layers.26.mlp.down_proj": {
|
|
|
3231 |
"outlier_size": 0,
|
3232 |
"vector_lens": [
|
3233 |
-1,
|
3234 |
+
4
|
3235 |
]
|
3236 |
},
|
3237 |
"model.layers.26.mlp.gate_proj": {
|
|
|
3255 |
"outlier_size": 0,
|
3256 |
"vector_lens": [
|
3257 |
-1,
|
3258 |
+
4
|
3259 |
]
|
3260 |
},
|
3261 |
"model.layers.26.mlp.up_proj": {
|
|
|
3279 |
"outlier_size": 0,
|
3280 |
"vector_lens": [
|
3281 |
-1,
|
3282 |
+
4
|
3283 |
]
|
3284 |
},
|
3285 |
"model.layers.26.self_attn.k_proj": {
|
|
|
3303 |
"outlier_size": 0,
|
3304 |
"vector_lens": [
|
3305 |
-1,
|
3306 |
+
4
|
3307 |
]
|
3308 |
},
|
3309 |
"model.layers.26.self_attn.o_proj": {
|
|
|
3327 |
"outlier_size": 0,
|
3328 |
"vector_lens": [
|
3329 |
-1,
|
3330 |
+
4
|
3331 |
]
|
3332 |
},
|
3333 |
"model.layers.26.self_attn.q_proj": {
|
|
|
3351 |
"outlier_size": 0,
|
3352 |
"vector_lens": [
|
3353 |
-1,
|
3354 |
+
4
|
3355 |
]
|
3356 |
},
|
3357 |
"model.layers.26.self_attn.v_proj": {
|
|
|
3375 |
"outlier_size": 0,
|
3376 |
"vector_lens": [
|
3377 |
-1,
|
3378 |
+
4
|
3379 |
]
|
3380 |
},
|
3381 |
"model.layers.27.mlp.down_proj": {
|
|
|
3399 |
"outlier_size": 0,
|
3400 |
"vector_lens": [
|
3401 |
-1,
|
3402 |
+
4
|
3403 |
]
|
3404 |
},
|
3405 |
"model.layers.27.mlp.gate_proj": {
|
|
|
3423 |
"outlier_size": 0,
|
3424 |
"vector_lens": [
|
3425 |
-1,
|
3426 |
+
4
|
3427 |
]
|
3428 |
},
|
3429 |
"model.layers.27.mlp.up_proj": {
|
|
|
3447 |
"outlier_size": 0,
|
3448 |
"vector_lens": [
|
3449 |
-1,
|
3450 |
+
4
|
3451 |
]
|
3452 |
},
|
3453 |
"model.layers.27.self_attn.k_proj": {
|
|
|
3471 |
"outlier_size": 0,
|
3472 |
"vector_lens": [
|
3473 |
-1,
|
3474 |
+
4
|
3475 |
]
|
3476 |
},
|
3477 |
"model.layers.27.self_attn.o_proj": {
|
|
|
3495 |
"outlier_size": 0,
|
3496 |
"vector_lens": [
|
3497 |
-1,
|
3498 |
+
4
|
3499 |
]
|
3500 |
},
|
3501 |
"model.layers.27.self_attn.q_proj": {
|
|
|
3519 |
"outlier_size": 0,
|
3520 |
"vector_lens": [
|
3521 |
-1,
|
3522 |
+
4
|
3523 |
]
|
3524 |
},
|
3525 |
"model.layers.27.self_attn.v_proj": {
|
|
|
3543 |
"outlier_size": 0,
|
3544 |
"vector_lens": [
|
3545 |
-1,
|
3546 |
+
4
|
3547 |
]
|
3548 |
},
|
3549 |
"model.layers.28.mlp.down_proj": {
|
|
|
3567 |
"outlier_size": 0,
|
3568 |
"vector_lens": [
|
3569 |
-1,
|
3570 |
+
4
|
3571 |
]
|
3572 |
},
|
3573 |
"model.layers.28.mlp.gate_proj": {
|
|
|
3591 |
"outlier_size": 0,
|
3592 |
"vector_lens": [
|
3593 |
-1,
|
3594 |
+
4
|
3595 |
]
|
3596 |
},
|
3597 |
"model.layers.28.mlp.up_proj": {
|
|
|
3615 |
"outlier_size": 0,
|
3616 |
"vector_lens": [
|
3617 |
-1,
|
3618 |
+
4
|
3619 |
]
|
3620 |
},
|
3621 |
"model.layers.28.self_attn.k_proj": {
|
|
|
3639 |
"outlier_size": 0,
|
3640 |
"vector_lens": [
|
3641 |
-1,
|
3642 |
+
4
|
3643 |
]
|
3644 |
},
|
3645 |
"model.layers.28.self_attn.o_proj": {
|
|
|
3663 |
"outlier_size": 0,
|
3664 |
"vector_lens": [
|
3665 |
-1,
|
3666 |
+
4
|
3667 |
]
|
3668 |
},
|
3669 |
"model.layers.28.self_attn.q_proj": {
|
|
|
3687 |
"outlier_size": 0,
|
3688 |
"vector_lens": [
|
3689 |
-1,
|
3690 |
+
4
|
3691 |
]
|
3692 |
},
|
3693 |
"model.layers.28.self_attn.v_proj": {
|
|
|
3711 |
"outlier_size": 0,
|
3712 |
"vector_lens": [
|
3713 |
-1,
|
3714 |
+
4
|
3715 |
]
|
3716 |
},
|
3717 |
"model.layers.29.mlp.down_proj": {
|
|
|
3735 |
"outlier_size": 0,
|
3736 |
"vector_lens": [
|
3737 |
-1,
|
3738 |
+
4
|
3739 |
]
|
3740 |
},
|
3741 |
"model.layers.29.mlp.gate_proj": {
|
|
|
3759 |
"outlier_size": 0,
|
3760 |
"vector_lens": [
|
3761 |
-1,
|
3762 |
+
4
|
3763 |
]
|
3764 |
},
|
3765 |
"model.layers.29.mlp.up_proj": {
|
|
|
3783 |
"outlier_size": 0,
|
3784 |
"vector_lens": [
|
3785 |
-1,
|
3786 |
+
4
|
3787 |
]
|
3788 |
},
|
3789 |
"model.layers.29.self_attn.k_proj": {
|
|
|
3807 |
"outlier_size": 0,
|
3808 |
"vector_lens": [
|
3809 |
-1,
|
3810 |
+
4
|
3811 |
]
|
3812 |
},
|
3813 |
"model.layers.29.self_attn.o_proj": {
|
|
|
3831 |
"outlier_size": 0,
|
3832 |
"vector_lens": [
|
3833 |
-1,
|
3834 |
+
4
|
3835 |
]
|
3836 |
},
|
3837 |
"model.layers.29.self_attn.q_proj": {
|
|
|
3855 |
"outlier_size": 0,
|
3856 |
"vector_lens": [
|
3857 |
-1,
|
3858 |
+
4
|
3859 |
]
|
3860 |
},
|
3861 |
"model.layers.29.self_attn.v_proj": {
|
|
|
3879 |
"outlier_size": 0,
|
3880 |
"vector_lens": [
|
3881 |
-1,
|
3882 |
+
4
|
3883 |
]
|
3884 |
},
|
3885 |
"model.layers.3.mlp.down_proj": {
|
|
|
3903 |
"outlier_size": 0,
|
3904 |
"vector_lens": [
|
3905 |
-1,
|
3906 |
+
4
|
3907 |
]
|
3908 |
},
|
3909 |
"model.layers.3.mlp.gate_proj": {
|
|
|
3927 |
"outlier_size": 0,
|
3928 |
"vector_lens": [
|
3929 |
-1,
|
3930 |
+
4
|
3931 |
]
|
3932 |
},
|
3933 |
"model.layers.3.mlp.up_proj": {
|
|
|
3951 |
"outlier_size": 0,
|
3952 |
"vector_lens": [
|
3953 |
-1,
|
3954 |
+
4
|
3955 |
]
|
3956 |
},
|
3957 |
"model.layers.3.self_attn.k_proj": {
|
|
|
3975 |
"outlier_size": 0,
|
3976 |
"vector_lens": [
|
3977 |
-1,
|
3978 |
+
4
|
3979 |
]
|
3980 |
},
|
3981 |
"model.layers.3.self_attn.o_proj": {
|
|
|
3999 |
"outlier_size": 0,
|
4000 |
"vector_lens": [
|
4001 |
-1,
|
4002 |
+
4
|
4003 |
]
|
4004 |
},
|
4005 |
"model.layers.3.self_attn.q_proj": {
|
|
|
4023 |
"outlier_size": 0,
|
4024 |
"vector_lens": [
|
4025 |
-1,
|
4026 |
+
4
|
4027 |
]
|
4028 |
},
|
4029 |
"model.layers.3.self_attn.v_proj": {
|
|
|
4047 |
"outlier_size": 0,
|
4048 |
"vector_lens": [
|
4049 |
-1,
|
4050 |
+
4
|
4051 |
]
|
4052 |
},
|
4053 |
"model.layers.30.mlp.down_proj": {
|
|
|
4071 |
"outlier_size": 0,
|
4072 |
"vector_lens": [
|
4073 |
-1,
|
4074 |
+
4
|
4075 |
]
|
4076 |
},
|
4077 |
"model.layers.30.mlp.gate_proj": {
|
|
|
4095 |
"outlier_size": 0,
|
4096 |
"vector_lens": [
|
4097 |
-1,
|
4098 |
+
4
|
4099 |
]
|
4100 |
},
|
4101 |
"model.layers.30.mlp.up_proj": {
|
|
|
4119 |
"outlier_size": 0,
|
4120 |
"vector_lens": [
|
4121 |
-1,
|
4122 |
+
4
|
4123 |
]
|
4124 |
},
|
4125 |
"model.layers.30.self_attn.k_proj": {
|
|
|
4143 |
"outlier_size": 0,
|
4144 |
"vector_lens": [
|
4145 |
-1,
|
4146 |
+
4
|
4147 |
]
|
4148 |
},
|
4149 |
"model.layers.30.self_attn.o_proj": {
|
|
|
4167 |
"outlier_size": 0,
|
4168 |
"vector_lens": [
|
4169 |
-1,
|
4170 |
+
4
|
4171 |
]
|
4172 |
},
|
4173 |
"model.layers.30.self_attn.q_proj": {
|
|
|
4191 |
"outlier_size": 0,
|
4192 |
"vector_lens": [
|
4193 |
-1,
|
4194 |
+
4
|
4195 |
]
|
4196 |
},
|
4197 |
"model.layers.30.self_attn.v_proj": {
|
|
|
4215 |
"outlier_size": 0,
|
4216 |
"vector_lens": [
|
4217 |
-1,
|
4218 |
+
4
|
4219 |
]
|
4220 |
},
|
4221 |
"model.layers.31.mlp.down_proj": {
|
|
|
4239 |
"outlier_size": 0,
|
4240 |
"vector_lens": [
|
4241 |
-1,
|
4242 |
+
4
|
4243 |
]
|
4244 |
},
|
4245 |
"model.layers.31.mlp.gate_proj": {
|
|
|
4263 |
"outlier_size": 0,
|
4264 |
"vector_lens": [
|
4265 |
-1,
|
4266 |
+
4
|
4267 |
]
|
4268 |
},
|
4269 |
"model.layers.31.mlp.up_proj": {
|
|
|
4287 |
"outlier_size": 0,
|
4288 |
"vector_lens": [
|
4289 |
-1,
|
4290 |
+
4
|
4291 |
]
|
4292 |
},
|
4293 |
"model.layers.31.self_attn.k_proj": {
|
|
|
4311 |
"outlier_size": 0,
|
4312 |
"vector_lens": [
|
4313 |
-1,
|
4314 |
+
4
|
4315 |
]
|
4316 |
},
|
4317 |
"model.layers.31.self_attn.o_proj": {
|
|
|
4335 |
"outlier_size": 0,
|
4336 |
"vector_lens": [
|
4337 |
-1,
|
4338 |
+
4
|
4339 |
]
|
4340 |
},
|
4341 |
"model.layers.31.self_attn.q_proj": {
|
|
|
4359 |
"outlier_size": 0,
|
4360 |
"vector_lens": [
|
4361 |
-1,
|
4362 |
+
4
|
4363 |
]
|
4364 |
},
|
4365 |
"model.layers.31.self_attn.v_proj": {
|
|
|
4383 |
"outlier_size": 0,
|
4384 |
"vector_lens": [
|
4385 |
-1,
|
4386 |
+
4
|
4387 |
]
|
4388 |
},
|
4389 |
"model.layers.4.mlp.down_proj": {
|
|
|
4407 |
"outlier_size": 0,
|
4408 |
"vector_lens": [
|
4409 |
-1,
|
4410 |
+
4
|
4411 |
]
|
4412 |
},
|
4413 |
"model.layers.4.mlp.gate_proj": {
|
|
|
4431 |
"outlier_size": 0,
|
4432 |
"vector_lens": [
|
4433 |
-1,
|
4434 |
+
4
|
4435 |
]
|
4436 |
},
|
4437 |
"model.layers.4.mlp.up_proj": {
|
|
|
4455 |
"outlier_size": 0,
|
4456 |
"vector_lens": [
|
4457 |
-1,
|
4458 |
+
4
|
4459 |
]
|
4460 |
},
|
4461 |
"model.layers.4.self_attn.k_proj": {
|
|
|
4479 |
"outlier_size": 0,
|
4480 |
"vector_lens": [
|
4481 |
-1,
|
4482 |
+
4
|
4483 |
]
|
4484 |
},
|
4485 |
"model.layers.4.self_attn.o_proj": {
|
|
|
4503 |
"outlier_size": 0,
|
4504 |
"vector_lens": [
|
4505 |
-1,
|
4506 |
+
4
|
4507 |
]
|
4508 |
},
|
4509 |
"model.layers.4.self_attn.q_proj": {
|
|
|
4527 |
"outlier_size": 0,
|
4528 |
"vector_lens": [
|
4529 |
-1,
|
4530 |
+
4
|
4531 |
]
|
4532 |
},
|
4533 |
"model.layers.4.self_attn.v_proj": {
|
|
|
4551 |
"outlier_size": 0,
|
4552 |
"vector_lens": [
|
4553 |
-1,
|
4554 |
+
4
|
4555 |
]
|
4556 |
},
|
4557 |
"model.layers.5.mlp.down_proj": {
|
|
|
4575 |
"outlier_size": 0,
|
4576 |
"vector_lens": [
|
4577 |
-1,
|
4578 |
+
4
|
4579 |
]
|
4580 |
},
|
4581 |
"model.layers.5.mlp.gate_proj": {
|
|
|
4599 |
"outlier_size": 0,
|
4600 |
"vector_lens": [
|
4601 |
-1,
|
4602 |
+
4
|
4603 |
]
|
4604 |
},
|
4605 |
"model.layers.5.mlp.up_proj": {
|
|
|
4623 |
"outlier_size": 0,
|
4624 |
"vector_lens": [
|
4625 |
-1,
|
4626 |
+
4
|
4627 |
]
|
4628 |
},
|
4629 |
"model.layers.5.self_attn.k_proj": {
|
|
|
4647 |
"outlier_size": 0,
|
4648 |
"vector_lens": [
|
4649 |
-1,
|
4650 |
+
4
|
4651 |
]
|
4652 |
},
|
4653 |
"model.layers.5.self_attn.o_proj": {
|
|
|
4671 |
"outlier_size": 0,
|
4672 |
"vector_lens": [
|
4673 |
-1,
|
4674 |
+
4
|
4675 |
]
|
4676 |
},
|
4677 |
"model.layers.5.self_attn.q_proj": {
|
|
|
4695 |
"outlier_size": 0,
|
4696 |
"vector_lens": [
|
4697 |
-1,
|
4698 |
+
4
|
4699 |
]
|
4700 |
},
|
4701 |
"model.layers.5.self_attn.v_proj": {
|
|
|
4719 |
"outlier_size": 0,
|
4720 |
"vector_lens": [
|
4721 |
-1,
|
4722 |
+
4
|
4723 |
]
|
4724 |
},
|
4725 |
"model.layers.6.mlp.down_proj": {
|
|
|
4743 |
"outlier_size": 0,
|
4744 |
"vector_lens": [
|
4745 |
-1,
|
4746 |
+
4
|
4747 |
]
|
4748 |
},
|
4749 |
"model.layers.6.mlp.gate_proj": {
|
|
|
4767 |
"outlier_size": 0,
|
4768 |
"vector_lens": [
|
4769 |
-1,
|
4770 |
+
4
|
4771 |
]
|
4772 |
},
|
4773 |
"model.layers.6.mlp.up_proj": {
|
|
|
4791 |
"outlier_size": 0,
|
4792 |
"vector_lens": [
|
4793 |
-1,
|
4794 |
+
4
|
4795 |
]
|
4796 |
},
|
4797 |
"model.layers.6.self_attn.k_proj": {
|
|
|
4815 |
"outlier_size": 0,
|
4816 |
"vector_lens": [
|
4817 |
-1,
|
4818 |
+
4
|
4819 |
]
|
4820 |
},
|
4821 |
"model.layers.6.self_attn.o_proj": {
|
|
|
4839 |
"outlier_size": 0,
|
4840 |
"vector_lens": [
|
4841 |
-1,
|
4842 |
+
4
|
4843 |
]
|
4844 |
},
|
4845 |
"model.layers.6.self_attn.q_proj": {
|
|
|
4863 |
"outlier_size": 0,
|
4864 |
"vector_lens": [
|
4865 |
-1,
|
4866 |
+
4
|
4867 |
]
|
4868 |
},
|
4869 |
"model.layers.6.self_attn.v_proj": {
|
|
|
4887 |
"outlier_size": 0,
|
4888 |
"vector_lens": [
|
4889 |
-1,
|
4890 |
+
4
|
4891 |
]
|
4892 |
},
|
4893 |
"model.layers.7.mlp.down_proj": {
|
|
|
4911 |
"outlier_size": 0,
|
4912 |
"vector_lens": [
|
4913 |
-1,
|
4914 |
+
4
|
4915 |
]
|
4916 |
},
|
4917 |
"model.layers.7.mlp.gate_proj": {
|
|
|
4935 |
"outlier_size": 0,
|
4936 |
"vector_lens": [
|
4937 |
-1,
|
4938 |
+
4
|
4939 |
]
|
4940 |
},
|
4941 |
"model.layers.7.mlp.up_proj": {
|
|
|
4959 |
"outlier_size": 0,
|
4960 |
"vector_lens": [
|
4961 |
-1,
|
4962 |
+
4
|
4963 |
]
|
4964 |
},
|
4965 |
"model.layers.7.self_attn.k_proj": {
|
|
|
4983 |
"outlier_size": 0,
|
4984 |
"vector_lens": [
|
4985 |
-1,
|
4986 |
+
4
|
4987 |
]
|
4988 |
},
|
4989 |
"model.layers.7.self_attn.o_proj": {
|
|
|
5007 |
"outlier_size": 0,
|
5008 |
"vector_lens": [
|
5009 |
-1,
|
5010 |
+
4
|
5011 |
]
|
5012 |
},
|
5013 |
"model.layers.7.self_attn.q_proj": {
|
|
|
5031 |
"outlier_size": 0,
|
5032 |
"vector_lens": [
|
5033 |
-1,
|
5034 |
+
4
|
5035 |
]
|
5036 |
},
|
5037 |
"model.layers.7.self_attn.v_proj": {
|
|
|
5055 |
"outlier_size": 0,
|
5056 |
"vector_lens": [
|
5057 |
-1,
|
5058 |
+
4
|
5059 |
]
|
5060 |
},
|
5061 |
"model.layers.8.mlp.down_proj": {
|
|
|
5079 |
"outlier_size": 0,
|
5080 |
"vector_lens": [
|
5081 |
-1,
|
5082 |
+
4
|
5083 |
]
|
5084 |
},
|
5085 |
"model.layers.8.mlp.gate_proj": {
|
|
|
5103 |
"outlier_size": 0,
|
5104 |
"vector_lens": [
|
5105 |
-1,
|
5106 |
+
4
|
5107 |
]
|
5108 |
},
|
5109 |
"model.layers.8.mlp.up_proj": {
|
|
|
5127 |
"outlier_size": 0,
|
5128 |
"vector_lens": [
|
5129 |
-1,
|
5130 |
+
4
|
5131 |
]
|
5132 |
},
|
5133 |
"model.layers.8.self_attn.k_proj": {
|
|
|
5151 |
"outlier_size": 0,
|
5152 |
"vector_lens": [
|
5153 |
-1,
|
5154 |
+
4
|
5155 |
]
|
5156 |
},
|
5157 |
"model.layers.8.self_attn.o_proj": {
|
|
|
5175 |
"outlier_size": 0,
|
5176 |
"vector_lens": [
|
5177 |
-1,
|
5178 |
+
4
|
5179 |
]
|
5180 |
},
|
5181 |
"model.layers.8.self_attn.q_proj": {
|
|
|
5199 |
"outlier_size": 0,
|
5200 |
"vector_lens": [
|
5201 |
-1,
|
5202 |
+
4
|
5203 |
]
|
5204 |
},
|
5205 |
"model.layers.8.self_attn.v_proj": {
|
|
|
5223 |
"outlier_size": 0,
|
5224 |
"vector_lens": [
|
5225 |
-1,
|
5226 |
+
4
|
5227 |
]
|
5228 |
},
|
5229 |
"model.layers.9.mlp.down_proj": {
|
|
|
5247 |
"outlier_size": 0,
|
5248 |
"vector_lens": [
|
5249 |
-1,
|
5250 |
+
4
|
5251 |
]
|
5252 |
},
|
5253 |
"model.layers.9.mlp.gate_proj": {
|
|
|
5271 |
"outlier_size": 0,
|
5272 |
"vector_lens": [
|
5273 |
-1,
|
5274 |
+
4
|
5275 |
]
|
5276 |
},
|
5277 |
"model.layers.9.mlp.up_proj": {
|
|
|
5295 |
"outlier_size": 0,
|
5296 |
"vector_lens": [
|
5297 |
-1,
|
5298 |
+
4
|
5299 |
]
|
5300 |
},
|
5301 |
"model.layers.9.self_attn.k_proj": {
|
|
|
5319 |
"outlier_size": 0,
|
5320 |
"vector_lens": [
|
5321 |
-1,
|
5322 |
+
4
|
5323 |
]
|
5324 |
},
|
5325 |
"model.layers.9.self_attn.o_proj": {
|
|
|
5343 |
"outlier_size": 0,
|
5344 |
"vector_lens": [
|
5345 |
-1,
|
5346 |
+
4
|
5347 |
]
|
5348 |
},
|
5349 |
"model.layers.9.self_attn.q_proj": {
|
|
|
5367 |
"outlier_size": 0,
|
5368 |
"vector_lens": [
|
5369 |
-1,
|
5370 |
+
4
|
5371 |
]
|
5372 |
},
|
5373 |
"model.layers.9.self_attn.v_proj": {
|
|
|
5391 |
"outlier_size": 0,
|
5392 |
"vector_lens": [
|
5393 |
-1,
|
5394 |
+
4
|
5395 |
]
|
5396 |
}
|
5397 |
},
|
generation_config.json
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
{
|
2 |
-
"attn_implementation": "flash_attention_2",
|
3 |
"bos_token_id": 1,
|
4 |
"cache_config": null,
|
5 |
"do_sample": true,
|
|
|
1 |
{
|
|
|
2 |
"bos_token_id": 1,
|
3 |
"cache_config": null,
|
4 |
"do_sample": true,
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:edecd3fe1538efdde1a1599c3171aa7f05a5daf57710a22e2b44bcc874101ac7
|
3 |
+
size 2967632296
|