{ "measurement": { "model.layers.0.parallel_decoder": { "attn": [ { "accuracy": 0.930476050627859, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9346898856915926, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9432834951501143, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9632907101982519, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9668777334062677, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9674946979472512, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9767721687492571, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9774363291890997, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.979384919530467, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9805689626618436, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9836139208392093, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9846697355571546, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9855234983720278, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9867480005088606, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9920476188785151, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9933078775280401, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9939453260678994, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9959760567075328, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9983032490862044, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.8982605934143066, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9017061873486167, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9232168511340493, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9328055632741827, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9545072756315532, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.95797388177169, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9691927150676125, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9748556488438657, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9776206534159811, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9777718311861942, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9803577253693029, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.988434352372822, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9901311695575714, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9929431104346326, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9943008113064264, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9968741538101121, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9977550990879536, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.1.parallel_decoder": { "attn": [ { "accuracy": 0.8054671789470472, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8142284468600625, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8439445621088931, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8951081225746557, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8985365378229242, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9034115264290258, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9227036802392257, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9280918711110165, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9382350946727552, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9403243190363834, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9495705554359838, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9565363181264777, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9538019707328395, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9611338910303617, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9726919061259219, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9796807201285112, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9773897356108615, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9897048865493975, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9941640868782997, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.9780847038093367, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9795633476031455, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9800966303599509, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9802395073991073, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9909735823932447, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9954551062301585, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9957140868431643, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9924163049773166, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9968705375335718, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9976172635429784, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9961397118474308, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.99885712005198, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9988522600186499, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9989749216719678, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9991864535565439, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9992293318439471, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9992796059109663, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.2.parallel_decoder": { "attn": [ { "accuracy": 0.7833522495470548, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7890413183914988, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8132132981952868, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8766252116153115, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8906653680299458, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8930963089591578, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9265656157543785, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9296666070034629, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9366829771744578, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9393464828792372, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9461647334851717, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.950099345884825, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9511282036178991, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9556602741542616, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9729850103980616, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.977784840684188, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9804989538694683, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9890661404321068, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9949002897268847, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.7771100495990954, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7820017965216386, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.8242902253803454, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8371448014911852, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8905197695681923, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8987390367608321, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9166588971489354, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9436170050972387, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9487704258216054, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9446885146592793, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9514188452770835, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9719201326370239, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9758394799734417, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9849628994339391, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.985749004702819, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9893210655764529, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9959740936756134, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.3.parallel_decoder": { "attn": [ { "accuracy": 0.7439806586817691, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7531795501708984, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7859361297205875, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8589457963642321, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8700153200249923, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8742637383310419, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9132318120253713, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9190733056319387, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9277856538170263, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9304927273800498, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9345278363478811, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9405585652903506, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9416247731760928, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.947956107164684, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9687702812646565, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.974499352668461, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.97716266230533, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9876846200541446, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9940336993650386, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.7178631079824347, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7244460708216617, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7690643511320415, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7823745075025057, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8603013314698872, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8708699000509161, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8897569806952226, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9283987597415322, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9346665457675332, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9294116810748452, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9379677584296778, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9642212265416196, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9691755457928306, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9808876577176546, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9818339504693684, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9855272346421292, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9949781973111002, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.4.parallel_decoder": { "attn": [ { "accuracy": 0.7582377885517321, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7675145801744963, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8010229311491314, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8662600015339098, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.878476155431647, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8814417688470138, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9183887306012606, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9222733347039473, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.926592651166414, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.928654062120538, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9399583025982505, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9434148010454679, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.946628683491757, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9504026143174422, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9706004763904371, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9752174239409598, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9781490282008523, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9883164198775041, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9944259928245294, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6957670011018452, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7026951438502261, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7516542484885768, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.766568986993087, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8485579239694696, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8601061670403731, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8811599329898232, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9223453496631823, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9292786623302259, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9234300600854974, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9327845071491442, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9611920086961043, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9666178948000858, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9792991845231307, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9803002225725275, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.984428302237862, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9945790406904722, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.5.parallel_decoder": { "attn": [ { "accuracy": 0.7561694446362948, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7650167816563657, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7978383490913793, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8559907361080772, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8733216084932026, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8776145859768516, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9086235698900724, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9142382960570485, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9211434000416806, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9230554731268632, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9377514437625283, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9409994702590139, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9438207588697735, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9472376923812063, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9683669585930674, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9737914816329354, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9752646167027322, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9876813425829536, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9935773289517352, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6867245874906841, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6935243104633533, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7478486613223427, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7641909247950504, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8433246487065366, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8553254227889211, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8786071476183439, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9196295612736752, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9269115862093473, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9207607193997032, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9304271559966237, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9598456589799178, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.965442695115742, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9786177810869718, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9796239169020402, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.98426616662427, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9944236337354309, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.6.parallel_decoder": { "attn": [ { "accuracy": 0.7588673390840229, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7674820548609683, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7959159047980058, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8609223114816766, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8775389570938914, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8804203962024889, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9176672948034186, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9213525684256303, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9270387887954712, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.929962578572725, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9396447758925588, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9428219418776662, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.946657331366288, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9502321544446444, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9705663731223658, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9753126768689406, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9782570208373823, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9872006303385684, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9943378948067364, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6821269487079822, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6889175364845677, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7445189576399953, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7610198071128443, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8398707665895161, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8522877442209345, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8758982231742457, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9178817334928011, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9251782078492015, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9188653356150577, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9287886054892289, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9588405678146764, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9645952864697105, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.978072757783689, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.979085806169008, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9838271533188067, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9942806069003908, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.7.parallel_decoder": { "attn": [ { "accuracy": 0.7587834910342568, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7656628709090383, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7968424872348183, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8516243633471037, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8776168823242188, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8808725884086207, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9165095718283403, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9206435617647672, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9261416761498702, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9285764192280016, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9394874259045249, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9431227759311074, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9458656436518619, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9497024636519582, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.969861808576082, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9749433131594407, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9775589610400953, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9880168179148122, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9941095470597869, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6737247517234402, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6807306691219932, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7378666024459035, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7548054895902935, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8360403336976704, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8487616338227925, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8729689874147114, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9160020602376837, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9233823763696771, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.916967981740048, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9271031367151361, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9578716284350345, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9637667411252072, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9775395958047164, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.978585454978441, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9834256281978205, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9940915554761887, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.8.parallel_decoder": { "attn": [ { "accuracy": 0.7512809853804738, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7593480411328768, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7908941068147358, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8508122343766062, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.875566419802214, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8780289449189839, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9184501422078986, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9217443591669986, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9288107784170854, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9318627683739913, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9383022471478111, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9423838477385671, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9455584350385164, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9501172806087294, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9703453653737119, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.975163492717241, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9788527974956914, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.987174361159927, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.994402736033264, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6709378142105906, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6780046161852384, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.73564150458888, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7526613285667018, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8343084611390766, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8472136949238024, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8715628950219405, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9150188094691226, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9225257446891383, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9160596508728831, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.926353322832208, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9574288474886041, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.963396618240758, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9773102810508326, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9783759242609927, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9832639051111121, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9940816838490335, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.9.parallel_decoder": { "attn": [ { "accuracy": 0.718372897097939, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7260836300097013, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7581080386513158, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8224478269878187, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8567493589300859, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8605221698158666, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9048464360990023, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.909926176071167, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9165806519357782, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9203107168799952, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9293831147645649, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9334418899134586, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9368867497695119, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.94125108969839, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9652458052886159, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9707844131871274, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9750660846107885, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9857171475887299, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9933762260173497, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6672878516347784, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6745826068677401, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7318799621180484, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7486633752521716, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.832694317165174, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8457297274940893, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.869647226835552, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9142413327568456, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.921657217176337, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9152104289908158, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9255955219268799, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9569733142852783, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9630001971596166, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9770302003935764, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9781164275972467, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9829077077539343, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9939997490299376, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.10.parallel_decoder": { "attn": [ { "accuracy": 0.7339446419163754, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7411629777205617, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7729998387788471, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8369578185834383, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8643090097527755, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8679592483922055, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9126631648916947, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9175041725761012, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9231229895039609, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9260692847402472, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9328736568752088, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9371711015701294, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9405221562636525, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9455749800330714, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9677151159236306, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9725841472023412, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9773449929136979, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9864391057114852, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9938831078378778, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6657338895295796, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6729772969296104, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7312179113689221, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.748377423537405, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8313648575230649, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8445289134979248, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8691165823685496, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9134185690628855, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9210737629940635, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9145288655632421, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9250016212463379, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9566271901130676, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9627093202189395, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9768297013483549, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9779536833888606, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9828708422811407, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9939477682897919, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.11.parallel_decoder": { "attn": [ { "accuracy": 0.7179902980202122, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7249249157152677, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7547630008898283, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.817019173973485, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8556631866254305, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8601378767113936, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9074208736419678, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9135855561808536, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.918959052939164, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9229228245584589, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9293851915158724, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9332508978090788, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.937223704237687, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.941717543100056, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9647792326776605, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9708494136207982, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9751581628071634, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9855343119094246, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.993498483770772, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6536664460834705, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6611749498467696, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7219134129975971, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7397729723077071, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8249182073693526, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8386519331681102, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8641821083269621, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9100238837693867, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.917967250472621, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9111405360071283, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9221041641737285, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9548905397716322, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9612548006208319, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9758933249272799, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9770699237522326, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.982188841229991, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9937166340256992, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.12.parallel_decoder": { "attn": [ { "accuracy": 0.718632698059082, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.725227155183491, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7572129902086759, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8175051337794254, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8539687959771407, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8578911455054032, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9039914545259977, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.909390085621884, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9225755302529586, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9256903121345922, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9283827166808278, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9348220825195312, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9364792547727886, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9438081352334273, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9646552079602292, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9716787432369433, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9746797508315036, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9857493827217504, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9933957928105405, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6643350751776445, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6714190934833728, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7288272757279246, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7456309167962325, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8300280194533498, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8451859825535825, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.869540578440616, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9114560139806647, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9207691958076075, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9144981534857499, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9251991573132967, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9565350413322449, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9627423976597033, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9765721402670208, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9778875206646166, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9827740286525927, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9937751995105493, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.13.parallel_decoder": { "attn": [ { "accuracy": 0.6941570984689813, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7014279114572626, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7372793649372302, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8003710445604826, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8419050040997957, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8457318732613012, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.894676923751831, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.8996665289527491, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9075084234538832, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9104249916578594, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9217277326081929, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9273029189360769, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9300187386964497, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.935999732268484, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.96172076777408, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9680512516122115, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9724680467655784, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9843555281036779, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.992874267069917, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6548959079541659, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6626522164595754, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.721803865934673, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7388981267025596, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8261178167242753, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8396546464217336, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8641707144285503, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9108383090872514, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9184852963999698, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9118231158507497, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9226468926981876, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9552753536324752, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9615387320518494, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9760577176746569, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9772613707341646, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9821042794930307, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9937151944951007, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.14.parallel_decoder": { "attn": [ { "accuracy": 0.7098881570916427, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7169526250738847, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7502455460397821, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8114799449318334, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.849690738477205, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.855514175013492, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9016630837791845, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9096022091413799, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9158908756155717, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9192541900433993, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9257033749630577, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9310748200667531, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9341797201257003, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9399152053029913, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9636967370384618, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.969683477753087, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9739969749199716, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9852739321558099, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9931236576092871, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6532007518567537, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6608391561006245, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7202771337408769, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7377033735576428, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8241769765552721, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8381531740489759, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8631883545925743, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9095145024751362, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9174906391846507, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9106207646821675, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9217057102604916, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9545282815632067, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9609882141414442, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.975580610727009, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9768520719126651, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9817732290217751, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9936004481033275, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.15.parallel_decoder": { "attn": [ { "accuracy": 0.6952617042943051, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.702199659849468, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7367651086104543, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7978360652923584, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8409423451674611, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.845682934710854, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8954773200185675, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9016672812010113, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9108322796068693, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9147403804879439, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9226629294847187, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9278804816697773, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9313266340054964, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9370645974811754, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9620957437314486, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9680987439657512, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9731230233844957, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.98447868855376, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.99291041571843, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6427466743870786, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6505862035249409, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.71230737786544, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7302878530401933, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8189642680318732, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8333305936110647, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8592333417189748, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.906845582158942, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9150523637470446, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9079483998449225, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9193594581202457, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9531621117340892, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9598638227111415, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.974882108600516, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9761441632321006, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.981214502924367, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9933824115677884, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.16.parallel_decoder": { "attn": [ { "accuracy": 0.6855991012171695, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.692503527591103, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7300847706041838, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7936420566157291, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8375495484000758, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8417386255766216, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8950594224427876, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9005170558628283, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9080253525784141, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9112888888308877, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9194729265413786, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9251009288587069, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9286196482808966, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9348150868164866, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9612428577322709, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9673537706073961, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9728065823253832, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9840947750367617, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9927455727991304, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6468934510883533, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6548124865481728, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7158539922613847, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7337350594369989, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8210926306875128, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8353278385965448, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8609649256656045, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9077676346427516, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9159649986969798, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9090229900259721, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9203425834053441, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9537333626496165, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9603135774010106, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9751086391900715, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9764437314711119, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9814679403054086, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9934413915402011, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.17.parallel_decoder": { "attn": [ { "accuracy": 0.6978263854980469, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7052231587861714, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7404862955996865, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8039253008993048, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8434507219414962, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8477957374171207, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9014269050798918, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.906421843327974, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9141046938143278, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9177780214108919, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9229056772432829, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9279881966741461, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9324554644132915, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9379526063015586, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9627249146762648, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9688914324107923, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9739385843276978, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.984753241664485, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9931739372642416, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6438412164386951, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6516251313058954, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7134927448473478, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7318632728175113, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8190425571642423, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8335279414528295, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8597868492728785, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9065273498233996, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9149638853575054, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9080617302342465, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9195284404252705, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9532857631382189, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9599527716636658, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9748658130043432, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9762602244552813, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9814796275214145, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9933935286183107, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.18.parallel_decoder": { "attn": [ { "accuracy": 0.671595297361675, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.6793177253321597, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7172083603708368, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7862956398411801, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8311587258389121, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8349858083223042, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8924645122728849, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.8973464275661268, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9059806560215197, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.909938981658534, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9155610109630384, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9216444743307013, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9251434614783839, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9318187613236277, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9594659554330927, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9657821372935647, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9715343870614704, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9833813513580122, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9924392441385671, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6468835880881862, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6547127773887234, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7141636798256321, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7315892169350071, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8210647984554893, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8353197825582404, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8601874426791543, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9078541805869654, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9159034929777446, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9090402941954763, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9203429410332128, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9537127519908705, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9603112308602584, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9750853055401852, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9764415574701208, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9813183373526523, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9934020842376509, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.19.parallel_decoder": { "attn": [ { "accuracy": 0.6805418667040373, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.6886289245203921, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7274316486559416, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7925176871450323, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.83540341728612, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8398427837773373, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8935697706122148, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.8991454651481227, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.907473739824797, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9109743770800138, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9188410357425087, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.924725701934413, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9281048461010581, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9345840654875103, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.960260620242671, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9672212412482813, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9715154108248258, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9840428405686429, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9926878543276536, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6377939174049778, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6456151008605957, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7059790460686934, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7239156271281995, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.816133135243466, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8306485100796348, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.856142244840923, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9052016609593442, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9134922090329622, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9065667014372976, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.918071050392954, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9524887486508018, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.959225130708594, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9744652697914525, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.975850909948349, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9808623680942937, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9933045004543505, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.20.parallel_decoder": { "attn": [ { "accuracy": 0.7061735956292403, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7143364705537495, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.748707796397962, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8112069556587621, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8494641780853271, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8532492236087197, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9070426539370888, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9118809260820088, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9190488990984464, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9228364040977076, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9258397692128232, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9309038739455373, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9356003372292769, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9410173014590615, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9643724968558863, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9702965585809005, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9754654451420433, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.985130184575131, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.993586673940483, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6484101195084422, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.655958050175717, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7151155722768683, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7331751773231907, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8211115034003007, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8353659730208547, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.860693705709357, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.907419386662935, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9156952782681114, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9090915416416369, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9203660550870394, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9537728177873712, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9603564958823354, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9750957489013672, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9765240408872303, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9816355940542723, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9934312120864266, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.21.parallel_decoder": { "attn": [ { "accuracy": 0.7068953765066046, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7145255992287083, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7499588163275468, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8082599388925653, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8488383418635318, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8527624356119257, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9032812244013736, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9084178397530004, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9151785938363326, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.918166016277514, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9256495111867001, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9304656856938412, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9350547665043881, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9400884916907862, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9640813971820631, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9699572544348868, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9744956038500133, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9849204436728829, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9934297690266057, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6474821943985789, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6549928062840512, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7135251195807206, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7313607366461503, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8210983778301039, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8350292005037007, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8600967432323255, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9075790204499897, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9157085606926366, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9092223518773129, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9203121473914698, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9539084152171486, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9603554135874698, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9752355123821058, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9765941861428713, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9815406360124287, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9934380207406847, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.22.parallel_decoder": { "attn": [ { "accuracy": 0.6802591524626079, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.6892588765997636, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7258530917920565, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7915127026407343, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8359752203288832, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8402029464119359, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8956681364460995, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9009185464758622, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.908083250648097, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.911775225087216, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9184517358478747, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9237329646160728, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.927893274708798, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9338350797954359, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9605803866135447, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9669328614285118, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9722294211387634, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9836869678999248, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9927225630534323, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6423374477185702, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6497887561195774, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7088003409536261, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7268202179356625, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8180151236684698, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8322316721866005, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8575304432919151, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9060312070344624, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9143113713515432, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9076379035648546, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9189251473075465, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9531030529423764, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9596390630069532, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9748123586177826, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9761764375787032, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9812324501966175, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9933254969747443, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.23.parallel_decoder": { "attn": [ { "accuracy": 0.7023843213131553, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7105254625019275, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7438932970950478, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8060823239778218, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8486545964291221, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8526804698140997, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9068531739084345, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9122600116227803, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9186277452268099, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9220818532140631, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9254543342088398, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.930153815369857, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.935046208532233, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9401136699475741, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9642206430435181, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.970048261316199, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9756380241168173, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9849307537078857, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.993643997923324, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6497838873612254, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6570531694512618, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7151448099236739, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7334323180349249, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8216909232892489, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8356796816775673, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8609853167282908, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.907563328742981, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9158918983057925, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.909487103161059, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.920589804649353, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9540174415237025, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9604685400661669, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9752338434520521, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9766687876299808, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9817771629283303, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9933963347422449, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.24.parallel_decoder": { "attn": [ { "accuracy": 0.6877906698929637, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.6960381708647075, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7287719877142655, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7963865054281134, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8391010886744449, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8426417426059121, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.900321690659774, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9049971605602064, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9127865276838604, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9165159401140714, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9204322036943937, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9258557495317961, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9303170505322909, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9362363752565885, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9618240939943414, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9680381981950057, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9740673413402156, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9838337192409917, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9932165287042919, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6557153902555767, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.662793059098093, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7197213925813374, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7374694472865054, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8249223985170063, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8385555367720754, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8632093103308427, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9094087575611315, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.917472713872006, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9111559641988654, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9220142427243685, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9548822829597875, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9611882002730119, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9757499239946666, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9770931727007816, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9820285878683391, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9935130814188405, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.25.parallel_decoder": { "attn": [ { "accuracy": 0.6610032884698165, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.6707554867393093, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.71793616445441, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7824530601501465, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8259977666955245, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8304378735391718, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.90048853347176, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9052380825343885, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9120520917992843, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9159143284747475, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9141299787320589, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9201742598884984, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9295224516015304, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9349449935712313, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9616463717661405, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9676007722553454, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9743736982345581, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9839910817773718, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9933827699799287, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6535754705730237, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6606721627084833, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7170025925887258, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7345888238204152, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8233074138039037, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.837216013356259, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8616831930060136, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9086712473317197, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9168168118125514, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9101721926739341, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9213179349899292, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.95413507285871, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9607752938019603, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9752895298757052, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9765821209079341, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9813515951758937, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9929244298683969, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.26.parallel_decoder": { "attn": [ { "accuracy": 0.6676344369587146, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.6778668102465177, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7238689221833882, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7884891409622996, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8300758913943642, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8341508539099443, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9021105076137342, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9059187864002428, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.912578821182251, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.916488948621248, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9159976243972778, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9213577948118511, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9308970100001285, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9362911111430118, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9623222225590756, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9681965614620008, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9747492639642013, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9838876237994746, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9933893060997913, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6413868853920384, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6487203899182772, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.706214427947998, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7239583918922825, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8169806380020945, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8312343923669112, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8562386914303428, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9055894738749454, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9139703323966578, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.907133096142819, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9184860869457847, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9528976804331729, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9594670345908717, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9747800529003143, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9760942694387937, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9810410847789363, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9933292097166965, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.27.parallel_decoder": { "attn": [ { "accuracy": 0.648330387316252, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.6611553242332058, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.711845222272371, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7783306523373252, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8259938265147962, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8301137497550563, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8978912391160664, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9022532199558458, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9090211705157631, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9131063473852057, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9135482373990511, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9190252580140766, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9276455389825922, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9332042556059987, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.960443725711421, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9664994760563499, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9731218218803406, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.983073259654798, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9929621329433039, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6378255894309597, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6452290635359914, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7025903902555767, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7201181712903475, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.815267098577399, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8296247783460116, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8544041106575414, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9049057207609478, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9132752669484991, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9063281761972528, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9177324960106298, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9525026428072076, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9590865561836645, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9745989285017315, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9758937060832977, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.980740592667931, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9932764109812284, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.28.parallel_decoder": { "attn": [ { "accuracy": 0.6601598639237254, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.6704858227779991, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7175351945977462, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7806221560428017, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8279071105153937, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8314407750179893, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9013670055489791, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9054008345854909, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9117913246154785, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9161316281870792, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9141997600856581, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9197501132362768, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9288148503554494, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9344450862784135, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9611018833361173, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.967080903680701, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.974201152199193, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9836810795884383, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9933010582861147, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6341227732206646, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6416310260170384, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.698328595412405, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7157564414174933, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8132538042570415, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8276831225345009, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8523572118658769, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9038673953006142, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9122810865703382, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9053321072929784, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9167896822879189, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9520120087422823, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9586321993878013, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9743590825482419, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9756457633093784, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9804803117325431, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9932191803267127, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.29.parallel_decoder": { "attn": [ { "accuracy": 0.6624486320897153, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.6732440245778937, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7240437206469084, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7838884654798006, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8273072368220279, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.831760983718069, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9019086172706202, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9058730853231329, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.913062145835475, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9166649329034906, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.914619433252435, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.920014312392787, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.931220531463623, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9365306653474507, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9626204120485407, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9682667976931522, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9747364709251806, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9844932822804702, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9933966186485792, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6340248208296926, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6416352422613847, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6966050800524259, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7136112012361225, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8129995245682566, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.827541100351434, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8514412202333149, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9036622988550287, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9121125685541254, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9052180741962633, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.916718746486463, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9519617243816978, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9585982623853182, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9742959282900158, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.975618977295725, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9802905697571603, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.993173137307167, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.30.parallel_decoder": { "attn": [ { "accuracy": 0.6455110750700298, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.6566194233141447, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7014899253845215, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7681720131321957, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8155109505904348, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8194194341960707, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.895015465585809, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.8990300015399331, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9064589174170243, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.91019228257631, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9080689831783897, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9138205741581163, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9252091207002339, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.930611108478747, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9592468393476385, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9653823375701904, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9727167643998799, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9830431436237536, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9929520817179429, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.632194017109118, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.639927813881322, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6937200395684493, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7102283678556743, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8118089500226473, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8265044061761153, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8498414943092748, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9030653426521703, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9115568713137978, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9045684274874235, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9161818027496338, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.951616384481129, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9583127247659784, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9740908271388004, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9754540057558763, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9799974278399819, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9931717075799641, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.31.parallel_decoder": { "attn": [ { "accuracy": 0.6513402085555227, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.6623291467365466, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7088272446080258, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7737836837768555, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8191090383027729, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8239386960079795, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8969887620524356, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9009009599685669, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9081497317866275, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9119311696604678, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9100891602666754, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.915382096641942, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9270792697605333, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9327101331008107, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9605067999739396, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9663721323013306, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.973247860607348, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9833863267773076, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9931166587691558, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6278312833685624, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6357831954956055, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6887061219466359, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7050587754500539, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8096326275875694, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8244704949228387, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8474326008244565, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9019284122868588, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9105085761923539, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9034957760258725, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9151947247354608, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9510849431941384, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9578358813336021, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9738144200099143, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9751645169760051, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9796201131845775, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.993041109881903, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.32.parallel_decoder": { "attn": [ { "accuracy": 0.670259902351781, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.6820098475406045, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7223805879291736, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7818305868851512, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8309905779989142, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8338006421139366, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9037879454462152, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9059439458345112, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9127544101915861, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9164710484052959, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9156176981173063, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9206370679955733, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9303586483001709, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9360793577997308, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9626006640886006, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9680890974245573, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9750869838814986, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9840364471862191, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9935402932919954, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6246710074575323, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6327140958685624, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6849774310463352, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7010008912337453, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.807690469842208, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8227864315635279, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8454042359402305, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9010104568381059, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9095969827551591, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9024140144649305, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.914334774017334, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9505100156131544, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9573893358832911, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9734640090089095, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9748956984595248, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.979252068619979, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9929956597717184, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.33.parallel_decoder": { "attn": [ { "accuracy": 0.6757471686915347, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.6856067808050859, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7260787361546567, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7890171753732782, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8367692922291002, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8393787459323281, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9024538366418136, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9049526453018188, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9122535743211445, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9161756289632697, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.918963677004764, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.924056887626648, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9303475806587621, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9358337678407368, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9624744057655334, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9680717179649755, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9748443521951374, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9841856611402411, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9935235490924433, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6267732068112022, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6348904308519865, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.685816815024928, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7015722676327354, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8088211260343853, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8237402313633969, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8457718773892051, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9014676621085719, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9100029844986766, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9030897617340088, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9147988997007671, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9509094702570062, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9576543695048282, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9737033781252409, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9750768322693674, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9793538953128614, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9930383374816493, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.34.parallel_decoder": { "attn": [ { "accuracy": 0.6957549546894275, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7039022194711786, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7387746007818925, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7982970664375707, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8462810014423571, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8484864485891241, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9090153669056139, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.911377235462791, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9181617247430902, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9216657061325877, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9233963552274203, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9284332049520392, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9341692422565661, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9391633460396215, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9645235130661413, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9697249406262448, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9765306350431944, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9849384270216289, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.993920164672952, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6452456273530659, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6529500860916941, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6999255481519198, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7147729522303531, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8182187331350226, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8324507286674098, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8528746805692974, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9059687852859497, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9141580117376227, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9077995764581781, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9189794377276772, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9532379319793299, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9596574306488037, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9747572149101057, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9762875500478243, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9802387121476626, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9932299946483812, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.35.parallel_decoder": { "attn": [ { "accuracy": 0.7055354620281018, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7159720220063862, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.749621265812924, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8063493904314543, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8516018641622443, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8532422718248869, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9119244688435605, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.913212305621097, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9202291840001157, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9239591297350431, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.925609638816432, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9306499393362748, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9358511786711843, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9413472790467112, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9655815676638955, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9705459318662945, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9769535660743713, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9852463257940192, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9940117228972284, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.661155248943128, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.6687020753559314, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7124791647258557, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7269921553762335, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8262998179385537, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8399210227163214, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8591125262411017, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9096213453694394, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9175833526410555, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9118611059690777, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9225869304255435, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9553088232090599, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9614738602387278, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9758367679621044, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9773280651945817, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9810766264011985, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9934815235043827, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.36.parallel_decoder": { "attn": [ { "accuracy": 0.7017382069637901, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.710041598269814, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7451053167644299, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8054196583597284, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8491272926330566, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8510639792994449, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9116034005817614, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9137208022569355, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9205119860799689, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9243495464324951, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9240820972543013, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9295322142149273, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9348176027599134, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9406312955053229, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9648931089200472, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9703809932658547, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9764754285937861, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9853132429875826, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9938466329323618, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.6981470459385922, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7045754131517912, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7434030833997225, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7569661642375746, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8450068423622533, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8570774103465834, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.87442946434021, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9187828427866885, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9261189950139899, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9213264866879112, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9308789215589824, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9600995873150072, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9655852976598238, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9783193484732979, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9797895346817217, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9832542773924375, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.994099346822814, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.37.parallel_decoder": { "attn": [ { "accuracy": 0.7433864944859555, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7513065338134766, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7828438658463328, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8344265159807707, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8710914160075941, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.872856014653256, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9262751278124357, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9281772563332006, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9330485369029798, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.936138579719945, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9356434345245361, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9395827996103387, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9455923845893458, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9493898121934188, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9705464024292795, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9747914217020336, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9808271056727359, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9874497299131594, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9950515321995083, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.7416823788693077, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7471307202389366, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7805550223902652, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7929137882433439, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8667701294547633, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8772701338717812, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8925584868380898, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9295256702523482, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9362058451301173, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9322948016618428, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9405987701917949, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.965605942826522, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9704043771091261, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9811368851285232, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9826169233573111, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9857778925644723, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9948123183689619, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.38.parallel_decoder": { "attn": [ { "accuracy": 0.7608285201223273, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.7718969144319233, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.7987103462219238, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8482777946873715, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8783379353974995, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8797702161889327, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.935034243684066, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9361700446982133, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9405840321591026, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9435161163932398, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9396130662215383, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9433082655856484, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9506815483695582, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9544244126269692, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9733293715276217, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9772790262573644, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9832434638550407, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9885685906598443, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.995626311553152, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.7763226910641319, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.7809223626789293, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.8114168894918341, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8229559220765766, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8837557968340422, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.8933767268532201, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9074333592465049, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9379522549478632, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9442545991194875, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9406754907808805, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9481922137109857, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9697201377467105, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9740887315649736, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9831756463176325, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9846306600068745, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.987696857828843, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9951184493930716, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.layers.39.parallel_decoder": { "attn": [ { "accuracy": 0.8135826838643927, "total_bits": 569401344, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8215859563727128, "total_bits": 591945728, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.850021851690192, "total_bits": 647517184, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8941154354497006, "total_bits": 713050112, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9076914410842092, "total_bits": 844115968, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9089779414628681, "total_bits": 845691904, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9473124177832353, "total_bits": 1083191296, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9484796994610837, "total_bits": 1085291520, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9520582148903295, "total_bits": 1095792640, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9542642110272458, "total_bits": 1108393984, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9539915323257446, "total_bits": 1114127360, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.95688228544436, "total_bits": 1123055616, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9624417486943697, "total_bits": 1179148288, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9653303403603404, "total_bits": 1195950080, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9796802213317469, "total_bits": 1440755712, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9826758111778059, "total_bits": 1464385536, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9864633851929715, "total_bits": 1620062208, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.990984791203549, "total_bits": 1779482624, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9964643936408194, "total_bits": 2156933120, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "mlp": [ { "accuracy": 0.8540777783644826, "total_bits": 1238922592, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.8572109749442652, "total_bits": 1283618144, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.8796408803839433, "total_bits": 1433121792, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8875166240491366, "total_bits": 1608233984, "gate_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "down_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9248019456863403, "total_bits": 1812774416, "gate_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9318139490328337, "total_bits": 1862132736, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9410648847881117, "total_bits": 2002191888, "gate_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9599670767784119, "total_bits": 2289099280, "gate_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.963995055148476, "total_bits": 2322850816, "gate_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9614385931115401, "total_bits": 2356985360, "gate_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9665874368266055, "total_bits": 2406343680, "gate_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9800983134068941, "total_bits": 2901196304, "gate_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9831630650319552, "total_bits": 2950554624, "gate_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9888232107225218, "total_bits": 3360210448, "gate_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.989771652378534, "total_bits": 3477813504, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9919696199266534, "total_bits": 3790289152, "gate_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9966016737254042, "total_bits": 4447746304, "gate_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "up_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "down_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ] }, "model.norm.norm": null, "lm_head.linear": null }, "last_module_idx": 42 }