diff --git "a/measurement.json" "b/measurement.json" new file mode 100644--- /dev/null +++ "b/measurement.json" @@ -0,0 +1,78047 @@ +{ + "last_module_idx": 82, + "measurement": { + "lm_head.linear": null, + "model.layers.0.mlp": [ + { + "accuracy": 0.9146036725295217, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.926093434032641, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9226845314628199, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.923060981850875, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830354922696164, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9846627304428502, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9857256710529327, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906071902889955, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936510998951761, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930994400852605, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947583020517701, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970879817479535, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971804532565569, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982006251811981, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984137277658048, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987566984797779, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992322343352594, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.0.self_attn": [ + { + "accuracy": 0.8902142424332469, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9075412624760678, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9193279304002461, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9443372738988776, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9503695745217173, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9518840156103435, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9650886905820746, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9661802774981448, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9696553851428785, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9716460830286929, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9764224228106047, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9780935002000708, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9796152240351627, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9812940531655362, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884948612828004, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904803100385164, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906667557201887, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941206990103972, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974456676527074, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.1.mlp": [ + { + "accuracy": 0.9808326410619836, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813314550801328, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844632054630079, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9854968582328997, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903440146069777, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911089500314311, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925256645993182, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.99496531133589, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954382922304305, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950929907591719, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957021172893675, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974987495102381, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978583186472717, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986437109151953, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987256181867499, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990082422369405, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996334621682763, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.1.self_attn": [ + { + "accuracy": 0.9803257145379719, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9809459385118986, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837873170250341, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9869185552785271, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9877700060606003, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895298402560385, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892894374696832, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911264636014637, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933320879936218, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993585211273871, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943512105628064, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952199133603197, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946440693579222, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957159040005583, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968430750856274, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980176012766989, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971417970955372, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991132987565116, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993186767556166, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.10.mlp": [ + { + "accuracy": 0.9565715507457131, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9579362084991053, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9651870821651659, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9673285390201368, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.977991742523093, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9798771205701327, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9828752702788303, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884562006122187, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894577664764304, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887122151098753, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901579626296696, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994212607019826, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950745921385916, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968106223172263, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970387136073489, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977118733682131, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991458052475202, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.10.self_attn": [ + { + "accuracy": 0.9678778773859927, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.968979321028057, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9717913489592703, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9796436774103265, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837091565132141, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9840335610665774, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.989910677075386, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903162341368826, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9890433879275071, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902301611084687, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916346112364217, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922925178941927, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927690319324795, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993398799708015, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959352392899362, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966346052916426, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997216058796958, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980914771164718, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991404917091131, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.11.mlp": [ + { + "accuracy": 0.9566346532420108, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9579700482519049, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9649937027379086, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9670185697706122, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9780616368118086, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9800019766155043, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9828334544834337, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9885100385076121, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895417149129667, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887194241348066, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902074258578452, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994207805708835, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995095882760851, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968155489156121, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970171271186126, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976565528072809, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999150249910982, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.11.self_attn": [ + { + "accuracy": 0.9671865011516371, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9682522510227404, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9707271488089311, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9776127307038558, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9822284704760501, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9826375716610959, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876322260028437, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9881160745495244, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900295781461816, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897178191887704, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915468669251392, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.991921507214245, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923238573889983, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927519724557274, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957240878751403, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964727037831357, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968908795792806, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978229099590528, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991362777195478, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.12.mlp": [ + { + "accuracy": 0.9569312710511058, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9582051603417647, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9649120694712588, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9668933655086317, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9781355450027868, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9800489717408231, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9828519899594156, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9885261803865433, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895351442851519, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887768908550865, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902249684459284, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942347587723481, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951042050593778, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996818973241668, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970529087279972, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976669112710577, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991342737094352, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.12.self_attn": [ + { + "accuracy": 0.9684976057002419, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9693810814305356, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9722787737846375, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9789447878536425, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9829498121612951, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9834213837196952, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878154538179699, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884170905539864, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902055153721258, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908368642392912, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917238237042176, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924568596639132, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927834719419479, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932980607998999, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959445917292645, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966345174532187, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969952024127308, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977936774100128, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992118314104644, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.13.mlp": [ + { + "accuracy": 0.9552545547485352, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9566639630418075, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9634791204803869, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.965497120430595, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9773712722878707, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9793075777982411, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.982183857967979, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9881337112502048, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891702391599354, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883948313562494, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898852611842909, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940209882824045, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994932809942647, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967007288023045, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969378200016523, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975629729267798, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991015727190595, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.13.self_attn": [ + { + "accuracy": 0.9637941749472367, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9648083228813975, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9677952120178624, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9744153697239725, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9808228784485867, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9812018118406597, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.986068805581645, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864057694610796, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888477584249095, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898571034795359, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909018474189859, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913577868750221, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914683472169074, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927780047843331, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951766392118052, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961675323153797, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963177301381764, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975939885173973, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990940727293491, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.14.mlp": [ + { + "accuracy": 0.9536153329046149, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9550843395684895, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9622548598992198, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9644144961708471, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9764667357269087, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9784872673059765, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9814996954641844, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875849093261518, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886357658787778, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.987860475715838, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894393047219828, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937730364893612, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947063754263678, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965293113338319, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968045332321995, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974551989059699, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990439772801963, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.14.self_attn": [ + { + "accuracy": 0.9624935765015452, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9630165821627567, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9652109146118164, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9706897704224837, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.981150558120326, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9806588260751021, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.985657839398635, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9846777351279008, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9850424418323919, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9863854756480769, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908363662267986, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908118146030527, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912829767716559, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914964766878831, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951866382831022, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960481332320916, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961413497987547, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977787097817973, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989197975710818, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.15.mlp": [ + { + "accuracy": 0.9522772274519268, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9538210755900333, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9614703278792531, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9638475587493495, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9757371293871027, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9778157190272683, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9810886336000342, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871332739528856, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9882602840662003, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875600918343193, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891508394166043, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993624731506172, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945690710293619, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964487191877867, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967465265408942, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974604129399124, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990482456786068, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.15.self_attn": [ + { + "accuracy": 0.9564402134794938, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.95717832289244, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9601790152098003, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9658156319668418, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9808165776102167, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9810132180389605, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860316797306663, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9862324360169863, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9862103917096791, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867930286809018, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898000665401158, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905018226096505, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.990569396238578, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914349380292391, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945292602244177, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953243626575721, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995510673444522, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972045545123125, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988978111038083, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.16.mlp": [ + { + "accuracy": 0.9494576297308269, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9511257127711648, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9593731980574758, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9619873291567752, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9742412567138672, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9764359028715837, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.979992505751158, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9862711257056186, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9874864385316247, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867882759947526, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884732122484007, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932291735159723, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942312656264556, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962107370558538, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965470672438019, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973325721527401, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989763000293782, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.16.self_attn": [ + { + "accuracy": 0.9559140393608495, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9570866164408232, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9581218989271867, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.963792549936395, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9795843566718855, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9798285772925929, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858072447149377, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860285379384693, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.986721761916813, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871267001879843, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893678230674643, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9880651761042444, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9885315330404985, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892390550751435, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945546321963009, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956295709860953, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956544912175128, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973158773623014, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988779086423548, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.17.mlp": [ + { + "accuracy": 0.9451589835317511, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9470729451430471, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9560409313754031, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9589567968719884, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9721072316169739, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9745389521121979, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9783763163968137, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9850065676789534, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.986382865592053, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9857141563766881, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875618423286238, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926838223871431, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937740525132731, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958668935455774, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962818908848261, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971377477049828, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988851202161688, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.17.self_attn": [ + { + "accuracy": 0.9557533326901888, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9574560240695351, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9611788454808687, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9698379416214792, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.977645257585927, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9779580564875352, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9851778698594946, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9854776467147627, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.985942321388345, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9868116943459762, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883516375955782, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892636584608179, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897905666577188, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908500078477358, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943342342188484, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952974629245306, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957760097949129, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972588951258283, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988742664848503, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.18.mlp": [ + { + "accuracy": 0.9433004730626157, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9452723264694214, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9546518294434798, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9577015261901052, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9712338855392054, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.973720919144781, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9777472176049885, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845929020329526, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9859833529120997, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9852786001406217, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871776637278105, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924658364371249, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935826721944307, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957475901434296, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961692628107572, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970488328682748, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988276190859707, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.18.self_attn": [ + { + "accuracy": 0.9559288966028314, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9578706935832375, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9617280646374351, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9704369149710003, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9767923590384031, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.977366201187435, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9833401645484724, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9840482206721055, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9868202931002567, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875387063151911, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9882459358165139, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.98956551049885, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894162455671712, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910972745794999, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940054483319584, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955604762623185, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952368885278702, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997437832779006, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998660164542104, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.19.mlp": [ + { + "accuracy": 0.9428346909974751, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9447833299636841, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9540879726409912, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.957111982922805, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9710174202919006, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9735181959051835, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9775311774329135, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845260538552937, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.985895972502859, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9851573376279128, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870983848446294, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924065839303168, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935510060504863, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957101054881748, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961443249332277, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969912359589025, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988322365832957, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.19.self_attn": [ + { + "accuracy": 0.9542441399473893, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9554875963612607, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9595493768390856, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9671793799651296, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.975263365005192, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9771396988316586, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9809197752099288, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9832866772225028, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9866144076773995, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.987097608415704, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9880621848922027, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897930912281337, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891719465192995, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907999744540766, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934846986281244, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952719274320101, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945642579542963, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997270013352758, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985763410988607, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.2.mlp": [ + { + "accuracy": 0.9618985621552718, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9614388472155521, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9593016066049275, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9596736054671438, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9784508033802635, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884568719487441, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892690981689253, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9826220336713289, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902828531829935, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9874366710060521, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993007201113199, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955861537080062, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958217065585288, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.99593664548899, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998423767717261, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986168913739292, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988392429720414, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.2.self_attn": [ + { + "accuracy": 0.9672725953553852, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9683106823971397, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9748860754464802, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9780709476847398, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9811634233123377, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841658880836085, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.982715346311268, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9861032068729401, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900108513079191, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903992367418188, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917273772390265, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993816944721498, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922312058900532, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942626466876582, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995128780211273, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970098449603507, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953941196987504, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987751442546907, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988421492866779, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.20.mlp": [ + { + "accuracy": 0.9435623256783736, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9453683401408949, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.954577270307039, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9575854320275157, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9714670244016146, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9738306967835677, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9778377460805994, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.984831806860472, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9861813736589331, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9854520135804227, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9872908349099913, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925716487984908, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936461813355747, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995852880179882, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962222325174432, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970505004258532, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988640684046244, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.20.self_attn": [ + { + "accuracy": 0.9608393907546997, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9619483383078324, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9663774967193604, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9736068687940899, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.978788015089537, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9798232442454288, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9836536959597939, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848728572067461, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878081820513073, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9885236315037075, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.989837048869384, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.991032915680032, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906421946851831, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918595748512369, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944599655113722, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958739139531788, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954217586078142, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976351447403431, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987669748891341, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.21.mlp": [ + { + "accuracy": 0.94247389467139, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9442662188881322, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9535060117119237, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9565237948769018, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9708639195090846, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9732448929234555, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9772853380755374, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845615656752336, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9859393584100824, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9851392256586176, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870143171988035, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924237053645285, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993517807047618, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957976533394111, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961516472854113, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969893678238517, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988517612218857, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.21.self_attn": [ + { + "accuracy": 0.9642854050586098, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9653718785235756, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.968754956596776, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9758266304668627, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813180408979717, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9818955393213975, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9865019007732994, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871675207426673, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9890647334487814, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902386579074358, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907748738401815, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.991699597553203, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914333592904242, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.992898724581066, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951651076737204, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996348279087167, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961785348622423, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977487758978417, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989666623112402, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.22.mlp": [ + { + "accuracy": 0.9394302870097914, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9413441105892784, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9509604824216742, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9541104530033312, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9692219401660719, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9718218979082609, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9760446642574511, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9836283106552927, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9851093433405224, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9842315488740018, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9862906901459945, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919252105449375, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931416856615167, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995472199822727, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958707435350669, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967350189230944, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998741736145396, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.22.self_attn": [ + { + "accuracy": 0.9613742138210096, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.962896422335976, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9669490738918907, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.973765630471079, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9795761720130318, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9799875789567044, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9842538755191, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9847013071963662, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9874638444498965, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879561019571204, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898600797904166, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910720335809808, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907558764282026, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921262993624336, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944405281230023, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959816383688074, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952289760112762, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977231757029107, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987550286674186, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.23.mlp": [ + { + "accuracy": 0.9370449655934384, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9389941817835757, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9488463872357419, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9520800050936247, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9679587483406067, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9706666155865318, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9750195992620367, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9829147960010328, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.984461596137599, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9836233242561943, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9857338823770222, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916112415100399, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928603940888455, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952836040603487, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957292785770014, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966281879889337, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986932183566847, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.23.self_attn": [ + { + "accuracy": 0.9665313614042181, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9678907802230433, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9709079045998422, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9770663113970506, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9814375764445255, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9832873814984372, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9857716183913382, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9881167380433333, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897739965664712, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901261368864461, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913568073197415, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921477068411676, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921842562524896, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929926818922946, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952705875039101, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964449503704121, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961631831369901, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981026406350889, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989331023473489, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.24.mlp": [ + { + "accuracy": 0.9374911785125732, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9393593072891235, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9489380936873586, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9520200961514523, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9682941091688055, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9708388981066252, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9750879752008539, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9832308872749931, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9847252337556136, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838446457135049, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.985851736445176, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917803006736856, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929429198566236, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995451105268378, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995825793398054, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996683941859948, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987597090045088, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.24.self_attn": [ + { + "accuracy": 0.9619279974385312, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9634955054835269, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9662432545109799, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9719956328994349, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9791069736606196, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9812507033348083, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9833630041072243, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9861891473594465, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9880018791085795, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886627863896521, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902322833475313, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914128458813617, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916550368070602, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919691744603609, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950857225217318, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960754011806688, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959626864445837, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978010299566545, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989419247170812, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.25.mlp": [ + { + "accuracy": 0.9383025420339484, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9401164870513112, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9490489583266408, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9518984901277643, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9687281345066271, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9713178433870014, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9753268110124689, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9834712806500887, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9850003985982192, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9840585903117531, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860542087178481, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918804694163171, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993044207754888, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955072653921027, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958720909137475, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966705045417735, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987521651936205, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.25.self_attn": [ + { + "accuracy": 0.9647987171223289, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9680633356696681, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9714141927267376, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9775517441724476, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9815655538910314, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837308008419839, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858512972530565, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9882415407582333, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896397684749804, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904878884553909, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919308094601882, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926310338472065, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927888496925956, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993489229365399, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956958133139109, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966600560828259, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964553227550105, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982261514585269, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990190394026669, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.26.mlp": [ + { + "accuracy": 0.9388229533245689, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9405575614226491, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9492792267548411, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.952055877760837, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9688584145746733, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9713733259000277, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9752491759626489, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.98364845859377, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.985104153030797, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.984143498696779, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.986110635493931, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919280825476897, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930779667277085, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955700812371153, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958978616877606, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966643695768557, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987950395596655, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.26.self_attn": [ + { + "accuracy": 0.9718871681313765, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9732925421313235, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9760848111227939, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9808485335425327, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9840332724546131, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9855895905118239, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9877563730666512, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897383051483255, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906937291747645, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913541580501356, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925815431695235, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931665141331522, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933503152508485, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940078317334777, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960085714333936, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970559144490644, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966776774902093, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984863057340446, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990665073457518, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.27.mlp": [ + { + "accuracy": 0.9378668258064672, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9396259408248098, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9481801923952604, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.950898267720875, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.968343806894202, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9709175950602481, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9747232684963628, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9834101215789193, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848781055525729, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838801402794687, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.98589510352988, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.991794121108557, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929633062136801, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954986462467595, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958208485653526, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965668999050793, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987657870116987, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.27.self_attn": [ + { + "accuracy": 0.9720996869237799, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9741642004565189, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9777222150250485, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9815778967581297, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.984770445447219, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9859888883013475, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9880799227639249, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896134232219896, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914727556078058, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916828651177255, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928733533934543, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935551756306699, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993726788774917, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944021168508028, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963046167241899, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972309646637816, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968760209648233, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985913369608553, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991332553327084, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.28.mlp": [ + { + "accuracy": 0.9361885095897474, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9379724452370092, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9465875311901695, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.949328507247724, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9674614103216874, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9700896739959717, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9739310443401337, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9829001003190091, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844470557413603, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9834453855690203, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9854970762604162, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915779687856373, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927660673856735, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953738882353431, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957212672421807, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964777666487192, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987332859125576, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.28.self_attn": [ + { + "accuracy": 0.9661478180634349, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9680281940259432, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9717060296159041, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9774265634386163, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.982243719853853, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9831194265892631, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.98635775164554, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9874325418158582, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886904202009502, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898544910706972, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913365048797507, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921374775861439, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919011835989199, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930000901222229, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953183629795125, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964407388316957, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961266156874204, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980419538915157, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988968205687246, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.29.mlp": [ + { + "accuracy": 0.93642468828904, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9381574078610069, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9465309444226717, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9492019069822211, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9675370548900805, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9701958988842211, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9739087302433816, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830061385506078, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845405785660994, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9834866131606855, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9855505071188274, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915965340639415, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.992797065722315, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954009499204787, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957124504603838, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964354681341272, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998736264772321, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.29.self_attn": [ + { + "accuracy": 0.9676822580789265, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9706133321711892, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9738945976683968, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9786307043150851, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9828416360052008, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844427296989843, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9865858272502297, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9885413231034028, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903273151109093, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.990753845948922, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919175991886541, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928857268471467, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930532119776073, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938518452016931, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956653314201456, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996839805261085, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962609688702383, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981470255083159, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990488351567796, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.3.mlp": [ + { + "accuracy": 0.9807592646071785, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813143140391299, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.984491594527897, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9854990432136937, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902568341870057, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.991042255571014, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924485659913013, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949098809769279, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953776605819401, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950311964279727, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956557460521397, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974666329983034, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978330160834288, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986228875227665, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987082446092054, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990026909661921, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996347319040644, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.3.self_attn": [ + { + "accuracy": 0.9887831328730834, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895607907521097, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912725875252172, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931462571809166, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942719575605894, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994694961529029, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954774422865165, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959801521740461, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996314788335248, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965109997674039, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971434626924364, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974829648670397, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974471709053767, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977499111310432, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985629299557522, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987837192847541, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988010064943841, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994412829217157, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996901072659775, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.30.mlp": [ + { + "accuracy": 0.9352090546959325, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9370529212449726, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9452855963456004, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9479547425320274, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9669776998068157, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9696682252381977, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9733174756953591, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9826484118637285, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841878210243425, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9831892973498294, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9852916912028664, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.991456853715997, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926685063462508, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953049546793887, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956505012355352, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963645660563519, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986952136417753, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.30.self_attn": [ + { + "accuracy": 0.9659312932114852, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9732574726405897, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9776333852818138, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9809478850741136, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9855105265190727, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9855275530564157, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884415096358249, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886953344470576, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903823090227026, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911009261482641, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929367852838415, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933310598134995, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935287648909971, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940674399074755, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962409668062863, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971673565083429, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967050724907925, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985459157706875, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991469514605246, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.31.mlp": [ + { + "accuracy": 0.9341773296657362, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9360576742573788, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.944356033676549, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9471418543865806, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9663857096119931, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9691131585522702, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9728449959503976, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9822854776131479, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838720183623465, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9828900067429793, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9850147617490668, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912893756439811, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925252465825332, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995196387956017, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955651787550825, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963051386569676, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986697859866055, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.31.self_attn": [ + { + "accuracy": 0.9673204359255339, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9686267438687777, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9720036073734886, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9774690348851053, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9824775065246382, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837338077394586, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867079195223356, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9882496415000213, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897402007328836, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901459491566608, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916224934552845, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924087108750093, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925031544346559, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933003571472669, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956447191928562, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966266167006994, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964465298934987, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980971530864113, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990746888675188, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.32.mlp": [ + { + "accuracy": 0.9335096886283473, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9354397874129445, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.943655296375877, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.946466747083162, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9660387133297167, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9688058181812889, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9725331162151537, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.982020649470781, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.983647639814176, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9827006898428264, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848617613315582, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912028861673254, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924511329123848, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951314224224341, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955234978544084, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962653086373681, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986383379682114, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.32.self_attn": [ + { + "accuracy": 0.9719625962407965, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9733570437682303, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9771333242717543, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.980706302743209, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844516296135751, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.985850309070788, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879308954665535, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.989612349554112, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910880060572373, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913769756492815, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925579075750551, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934133353986239, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935140029380196, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943632883460898, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995978970276682, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971577909431959, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996485388200534, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984668725611348, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989494516661293, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.33.mlp": [ + { + "accuracy": 0.9306512004450748, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9327216838535509, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9414711312243813, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9444206388373124, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9646330224840265, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9675280639999791, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9714524055782118, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9812750377153096, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9829560267297845, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9819941301094858, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9842470178478643, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908479177638104, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921427682826394, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949251511379292, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953438282797211, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961257482829847, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985874090343714, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.33.self_attn": [ + { + "accuracy": 0.9663940323026556, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.967502844961066, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9724092765858299, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9778510049769753, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9821917477406954, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9829479189295518, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.987065641503585, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.988041069946791, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893831940073716, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899332084153828, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914527237415314, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920336235510675, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923944622278214, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993068299795452, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957968078945812, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966086019810877, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966427770100141, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998163500310559, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990967380765238, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.34.mlp": [ + { + "accuracy": 0.9309952949222765, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.933068639353702, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9417006091067666, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9446627842752557, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9647692849761561, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9676702524486341, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9715544110850284, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813216250193747, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.982997715473175, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9820375458190316, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9842890877472726, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908521402823297, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921665481830898, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949287301615665, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953413644903585, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996122479046646, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985888136648818, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.34.self_attn": [ + { + "accuracy": 0.9550836776432238, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.958030568925958, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9624329240698564, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.968410379008243, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9779520238700666, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.97918900847435, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9828225891841086, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844485050753543, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9868840457577455, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9869980317981619, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.988448147711001, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903840265775982, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899111986160278, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914171962361586, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938866735288971, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995780537002965, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947693108728057, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975424750070823, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986745144583677, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.35.mlp": [ + { + "accuracy": 0.9279767149373105, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9302951160230135, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9390550161662855, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.942192554473877, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9632417747848913, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9662742583375228, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9702680644236112, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9803514308051059, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9821373616394243, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9812754251455006, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9836239689274838, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904668613484031, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918264378058282, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946602653515967, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.99514906892651, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959642345968046, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984969322226549, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.35.self_attn": [ + { + "accuracy": 0.964866042137146, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9682286883655348, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9723897482219496, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9773834300668616, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.982017518658387, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.983099932733335, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9862445106631831, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9874100591007032, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887479433887884, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892165284407767, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913568167310012, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921488848171736, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922498684180411, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932102545311576, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951920238764662, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964972208989294, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958775494443742, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978924934801302, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998929121776631, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.36.mlp": [ + { + "accuracy": 0.9232648924777382, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9256026431133872, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9351558246110615, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9386178004114252, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9606387897541648, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9639326208516171, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9683486536929482, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9789203609290876, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9808507545998222, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.979873200780467, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9824171034913314, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897153252049496, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912122762516925, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942555058943597, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994752981945088, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956894422832289, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983881697254745, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.36.self_attn": [ + { + "accuracy": 0.9557132752318132, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9565050068654513, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.962658377070176, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9693853353199205, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9781936062009711, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9787191105516333, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838946932240537, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844804296368047, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.985106381930803, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9850002793889296, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886578462625805, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902314896646299, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899273569646635, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915204275595514, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940714126354769, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951879978179932, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950190311984012, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969529697769567, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986535586220654, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.37.mlp": [ + { + "accuracy": 0.9154049722771895, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9185097343043277, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9289309853001645, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9325643401396901, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9564703075509322, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9604757961473966, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9652861168510035, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9767286918665233, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9787665119296626, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9776854326850489, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.980651656263753, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9885206991120389, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903011635730141, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993491959414984, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941225659690405, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.99512146530967, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981640491046404, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.37.self_attn": [ + { + "accuracy": 0.9498044189653898, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9516809582710266, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.958022559943952, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9682356777944063, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9740963531167883, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9745617603000841, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9832422184316736, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9836871608307487, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9855864926388389, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9862627104709023, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871603195604525, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9880982072729814, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891135261247033, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900825282460765, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938911081928956, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948738593804208, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955475714645887, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970816701258484, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987492339783594, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.38.mlp": [ + { + "accuracy": 0.903095891601161, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9073334681360345, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9173680920349925, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9210327110792461, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9499946456206472, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9554639364543714, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9600849496690851, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.972636668305648, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9750199772809681, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9741810183776053, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9777927743761163, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864441956344404, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886971225864009, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917229139491132, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931686300980417, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941692379744429, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976976482491744, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.38.self_attn": [ + { + "accuracy": 0.94539727662739, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9557693067349886, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.960969466912119, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9676955812855771, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9755731726947584, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9763093700534419, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825342877915031, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9832354407561453, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9853370456319106, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858393747555582, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9880265656270479, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9890950980939364, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894867053157405, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903320332891062, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937041993988188, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950928013575705, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948275689231721, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997283258916516, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998585331988962, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.39.mlp": [ + { + "accuracy": 0.8804321790996351, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8856545498496607, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8959678850675884, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9008125066757202, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9387843797081395, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9445681634702181, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9491331514559294, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9661927285947298, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9693506109087091, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9685409257286474, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9728050796609176, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.983763043817721, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9862340531851116, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903186394980079, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916336410924008, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928481649411353, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996891430334041, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.39.self_attn": [ + { + "accuracy": 0.9535439390885203, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9556328371951455, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9611960461265162, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.970795612586172, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9785723968556053, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9790178882448297, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9855449701610365, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860322647973111, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875878903426623, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870416741622122, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893426173611691, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898676119352642, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906024626995388, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913182752697092, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947315132931659, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953810298129132, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959881988010908, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973575517927346, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986968495343861, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.4.mlp": [ + { + "accuracy": 0.9755456039780065, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9762898918829466, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9804606751391762, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9817438031497755, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876392962116944, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886461535566732, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904521099830929, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935421018224013, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941252136701032, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993700523125498, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944902611406226, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967886017341363, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972524601770075, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982514326509676, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983600166283155, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987396498847949, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995343245958027, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.4.self_attn": [ + { + "accuracy": 0.9841338443128687, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.98472272408636, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876134513240111, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904595088017615, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914594182842656, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919744753523877, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993182012909337, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937822375642625, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994538785203507, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945863897078916, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958026013091991, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961148680824983, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961050521386298, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964524134993553, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977376153202433, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998345598774521, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980424462180388, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992005865726816, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994804143513504, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.5.mlp": [ + { + "accuracy": 0.9700671277548137, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9709770616732145, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9760693892052299, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9776257056938974, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848520065608778, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860844125873164, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9882879625809821, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920641638730702, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927906048925299, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922860123609242, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932525691233183, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960707186868316, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966358005216247, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978560640623695, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979957797025379, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984597981368241, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994314732030034, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.5.self_attn": [ + { + "accuracy": 0.9846545740177757, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9852262719681388, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9872581127442812, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903780308208967, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919226334283227, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921528830340034, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939291010561743, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941700987125698, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951227021060491, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954372209153677, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959556350582525, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964209826369035, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996420894014208, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968367503269723, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979888373299649, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984286470632804, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998391732965645, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991392879501769, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995678352976316, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.6.mlp": [ + { + "accuracy": 0.9645047940705952, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9656393716209813, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9717388372672232, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9735803729609439, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9821293636372215, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9835792325044933, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9861744406976198, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906421382176248, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914799716911817, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908924220423949, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920326792880109, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953616610483119, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960308572963664, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974586091151363, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976357390221796, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981787528254484, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993284247619542, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.6.self_attn": [ + { + "accuracy": 0.9798698472349268, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9806957542896271, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.982775866985321, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873434493416234, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896603885449862, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900021537354118, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927409557919753, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931755128659701, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936676766527327, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939827428836572, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948151107681426, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951178713848716, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953631719476298, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956139330016939, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974096935046347, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997836991360313, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980323265649771, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987858836784175, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994791876034517, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.7.mlp": [ + { + "accuracy": 0.9611692961893583, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9623789912775943, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.968944185658505, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9708966048140275, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9804543460670271, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9820405464423331, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848058333522395, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898094056468261, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906901254465705, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900293734512831, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912760249878231, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949145183751458, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956493150246771, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972278448311906, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974011998427542, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979887869405119, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992648460167018, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.7.self_attn": [ + { + "accuracy": 0.9758893565127724, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9768864528128975, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9792004980539021, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848376669381794, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9874854566235292, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878244854901966, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916345233979978, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920117172755694, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928674462594484, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.992972698650862, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937880941127476, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943162202835083, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945946939681706, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951028090558554, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.99702786085637, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997521376923511, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978562571892613, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986202422725526, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994195588423234, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.8.mlp": [ + { + "accuracy": 0.9584409374939769, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9596955933068928, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9666724957917866, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9687803136674982, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9789637735015467, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9806931677617525, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9836569757837998, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9890010411802092, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899656733399943, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892532809784538, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.990604610035294, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945166479600104, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953101155789275, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970023584993262, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971988767777619, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978358449512407, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992052376466362, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.8.self_attn": [ + { + "accuracy": 0.9731117832033258, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9745758834638094, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.977521665786442, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9835119780741239, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860691898747495, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864617539079565, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904349312970513, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908431478236851, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919336775415822, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923853003665021, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932908455007955, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938535372677603, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941174874180242, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994398260979276, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966611277900244, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972655196723185, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975839527814012, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984499205295977, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993365132494977, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.9.mlp": [ + { + "accuracy": 0.9582830385157937, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9595687420744645, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9664769235410189, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9685635660824022, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9785271851639998, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9804815963694924, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9834024325797432, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887844143729461, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896051593517002, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889225677440041, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905055973090624, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994462293229605, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952420313891611, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969492982092657, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971616064247332, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997799213975668, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991558799029965, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.9.self_attn": [ + { + "accuracy": 0.9680912243692499, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9692194367709913, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.971783110969945, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9787669072025701, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9833194315433502, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837150385505274, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.988787074622355, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.989359123142142, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900832787940377, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907121321088389, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913882495541322, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922623799035424, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924699789599368, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932598902990943, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957555787343728, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965948427194044, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968271004526239, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980352866022211, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990741423282185, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.norm.norm": null + } +} \ No newline at end of file