diff --git "a/.ipynb_checkpoints/job_new-checkpoint.json" "b/.ipynb_checkpoints/job_new-checkpoint.json" deleted file mode 100644--- "a/.ipynb_checkpoints/job_new-checkpoint.json" +++ /dev/null @@ -1,19531 +0,0 @@ -{ - "in_dir": "models/InfinityRP-v1-7B", - "out_dir": "quant", - "cal_dataset": null, - "bits": 4.25, - "dataset_rows": 100, - "measurement_rows": 16, - "length": 2048, - "measurement_length": 2048, - "head_bits": 6, - "shard_size": 8192, - "compile_full": null, - "rope_scale": null, - "rope_alpha": null, - "output_measurement": null, - "progress": "measure_quant", - "cal_filename": "quant/cal_data.safetensors", - "last_module_idx": 20, - "measurement": { - "model.layers.0.self_attn": [ - { - "accuracy": 0.877558160769312, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8920840887646926, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9081911197618434, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9363968258625582, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9365445595038564, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.942653194854134, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9455481732362195, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9525189640882769, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9641569038166812, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9656057735218814, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9695918213968214, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.975748550636988, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9716661383250826, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9782651886343956, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9828798611109194, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9891270359144791, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.984513087737325, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938357384434264, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958357678138112, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.0.mlp": [ - { - "accuracy": 0.8928122971402972, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8958215960546544, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9119100676555383, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9173344956024697, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9565043312154318, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9596801524687755, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9683800632820317, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9770933203889351, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9795892483701831, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.978375457325264, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9810202146733278, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9889320632638899, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9904485003238446, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938132646983784, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943266064173689, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962660342479419, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981004675960561, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.1.self_attn": [ - { - "accuracy": 0.8811218664050102, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8892381708872945, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9087876677513123, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9326447442566094, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9343533541419005, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9389589319103643, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9434352113228095, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9488306794511645, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9615478613658955, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9627741326234842, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9681485275688925, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9749660843885258, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9698018612045991, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.977173926406785, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9818536368639845, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.987675760823645, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9836330727527016, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945132814004625, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959313869231233, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.1.mlp": [ - { - "accuracy": 0.9588492322517068, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9616163160749956, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9627159019736082, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9631071067170093, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9910702999423895, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932522132600609, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942449150533464, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956393962309352, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962568259614176, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9961303170972602, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996675437242773, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997688008628565, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979422566646367, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981770028243773, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982726750803474, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983845692997995, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984882608527856, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.2.self_attn": [ - { - "accuracy": 0.9908251211929479, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.991408574309102, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992458859771049, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993675801075848, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994788271203441, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948373522830048, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965591282387706, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967384456969356, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969066057697331, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970044491683917, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970905605740356, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973052069820513, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979640476613942, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981788659612281, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988910459107597, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999103294685483, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990608613405033, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995898298324104, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9997573641008594, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.2.mlp": [ - { - "accuracy": 0.9851439709922201, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9855616071727127, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9879577658384254, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.988712002903125, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9925517222084301, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931383278514994, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941876132384335, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996127000531966, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964865507790819, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962121661825988, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966825906058626, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980703299439292, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983479472177447, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989574960843464, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990161258748756, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992262515106153, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9997241090463113, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.3.self_attn": [ - { - "accuracy": 0.9877902277818832, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9884877424123452, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9903295453098652, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927693270575745, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933333755286998, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936398833034266, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948053595389387, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952120589656069, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957622516973826, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958903388009945, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967542606938974, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970506461410734, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971115854323694, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974128368378029, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983189021923432, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986627422469226, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985538446972465, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994418481816721, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996322107387976, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.3.mlp": [ - { - "accuracy": 0.9796435982479077, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9801889928793045, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9833071332326845, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9842834498390163, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9896790547615015, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990501946132434, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9918616717240136, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946439103897367, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951397506152525, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947450450731834, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954027841112724, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973223210820977, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977110353739638, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985551977082842, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986369029900647, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989059040166658, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996198801898956, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.4.self_attn": [ - { - "accuracy": 0.9869874521697822, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9876129854911644, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897810035189124, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9919724329252189, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993029248422796, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933637843687871, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945568198117575, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9949658676663315, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954036812690136, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955421193061691, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965360805906943, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967992044433853, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969644241561917, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972151707134846, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983551780399131, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986104364706032, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986048399020403, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993996110447609, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996264486820253, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.4.mlp": [ - { - "accuracy": 0.9749599839070517, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.975692595820874, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9796997007766837, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9809080750534409, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9873492685024088, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9883827685231441, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9900695283554102, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99340340492046, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99401205976028, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935563094117433, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943727555770525, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967176069131758, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971956435382661, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982133843157882, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998327449103511, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986691904977258, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999526621536413, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.5.self_attn": [ - { - "accuracy": 0.9835827787357726, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9843828327659714, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9867943229006702, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897796201617702, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.991518017431525, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9918365657427594, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937889119977817, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942788702411283, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948586996853057, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950644243876204, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957662717941659, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962210886756351, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964050972209263, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967787685424188, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980200059148238, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984140379452392, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983844864427259, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992390213868164, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995779650841876, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.5.mlp": [ - { - "accuracy": 0.9705221966203106, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9713911479829174, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9760656639639484, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9774791552489134, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9851487972283441, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.986337611549779, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9883030736451283, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9922504967544228, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929503599966043, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924508150617936, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933911441384178, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9961627697032925, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967119488191154, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979095270942995, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980500986291047, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984451229040707, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994426539411315, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.6.self_attn": [ - { - "accuracy": 0.9822573724055761, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9832440125265796, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9855538449299178, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9893292994483521, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908395661531311, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9910607869540783, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933926632725879, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936333832110426, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940586816029329, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946294622618312, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955138057029169, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958833956931669, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9961023020165923, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964041988438878, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978096634486543, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982008246374071, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982695582439192, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990905655845141, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995488510245004, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.6.mlp": [ - { - "accuracy": 0.9673304540154181, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9682946840889359, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9736931312731222, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9753207194765932, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9835306486163876, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.984885055596303, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9871333942895657, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9913822554394995, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9921747002579075, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9916181073671109, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9926825087076347, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957333708145214, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9963542855173153, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976679322273029, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978314867598543, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982968002012441, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993776031317937, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.7.self_attn": [ - { - "accuracy": 0.9786935243755579, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.979648781940341, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9819586639733691, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9864131048517791, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9889826933412176, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9892953702395684, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9921447649892223, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9926274995760698, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932040000138315, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936122741109055, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946200998843109, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9949663660035616, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952348157653192, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956850734122685, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997417845603961, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978503075391544, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980264655467564, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987998667255804, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994756118961193, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.7.mlp": [ - { - "accuracy": 0.9641545602169476, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9652655021238484, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9709185713804082, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9726241597121483, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9819826031544883, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9834710895574015, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.985805564903115, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9905818957516825, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9914401042226114, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908195180867455, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9919892764699302, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953217221007339, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960090658737739, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997440814726839, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976167267917557, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980993195969955, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993198641505411, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.8.self_attn": [ - { - "accuracy": 0.9767419997798769, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9777465877368262, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9800447621872943, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9852218260547441, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.987886099022274, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9885461548784453, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9915046460534397, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923844687199515, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927950824218753, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932025633874888, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942094030986098, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946533522421592, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994969437419633, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995307668296032, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970474801283624, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976633667186099, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977324636297693, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987777411879506, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994243624647712, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.8.mlp": [ - { - "accuracy": 0.9616129095421025, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9627800372086073, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9688033140509537, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9706333723213327, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9806940916337465, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.982275559517898, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9847867421707824, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9899120585453746, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908251865815959, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9901836471092936, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9914141281900045, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994999534229895, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957178560036578, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972557560173109, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974554669246763, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979735615503925, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992590449768805, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.9.self_attn": [ - { - "accuracy": 0.9747775378882101, - "total_bits": 89665536, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9757302311788264, - "total_bits": 92221440, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.977968896396066, - "total_bits": 95758848, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9828858211587527, - "total_bits": 112272384, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9872685813737151, - "total_bits": 132913152, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9875881420448422, - "total_bits": 132980224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9912961929742443, - "total_bits": 169613312, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9916281711163097, - "total_bits": 169745920, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9925874732067123, - "total_bits": 171195392, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931182578132537, - "total_bits": 173563904, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938500828575343, - "total_bits": 174923264, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943280449186108, - "total_bits": 175750144, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946892765032029, - "total_bits": 179253248, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950765724489955, - "total_bits": 181592064, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970571153588887, - "total_bits": 220469248, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975328947919863, - "total_bits": 223535104, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99787683802424, - "total_bits": 253499392, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986511852292, - "total_bits": 265838592, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994322786515113, - "total_bits": 337385472, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.9.mlp": [ - { - "accuracy": 0.9590275389583487, - "total_bits": 395461696, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9602844807270327, - "total_bits": 409224256, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9667213121429086, - "total_bits": 457272320, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9686913558919179, - "total_bits": 512977920, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9793533943593502, - "total_bits": 578397280, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9810565348322454, - "total_bits": 592872448, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9837822189652606, - "total_bits": 637454432, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9891924407627237, - "total_bits": 728741472, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9901648673081869, - "total_bits": 739476480, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9894799383994388, - "total_bits": 751543392, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908145699699066, - "total_bits": 766018560, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946373007750433, - "total_bits": 924689504, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954212989633608, - "total_bits": 939164672, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970581967101776, - "total_bits": 1069524064, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972712785177129, - "total_bits": 1110384896, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978221578670567, - "total_bits": 1209999616, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992097707888062, - "total_bits": 1415520512, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ] - } -} \ No newline at end of file