diff --git "a/measurement-Mistral-ClaudeLimaRP-v3-7B.json" "b/measurement-Mistral-ClaudeLimaRP-v3-7B.json" new file mode 100644--- /dev/null +++ "b/measurement-Mistral-ClaudeLimaRP-v3-7B.json" @@ -0,0 +1,103538 @@ +{ + "measurement": [ + { + "key": "model.layers.0.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.012768719345331192, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.01112768892198801, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.0055162981152534485, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.005821545142680407, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.0058214073069393635, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.002386680571362376, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.012554015964269638, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.011047772131860256, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.006019419524818659, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.005376091692596674, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.005630470346659422, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.00585910864174366, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.005374829284846783, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.0032014730386435986, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.002461375668644905, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.003143508452922106, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.0022385704796761274, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.0019184330012649298, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.002184594050049782, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.0018690497381612659, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.002130853710696101, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.002184493001550436, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.0017415289767086506, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0018491630908101797, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.012768719345331192, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.012768719345331192, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.0.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.012981564737856388, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.011346082203090191, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.00548484968021512, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.005778078455477953, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.0057778204791247845, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.00216547935269773, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.013130046427249908, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.011247038841247559, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.0059669530019164085, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0052896831184625626, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.005525203887373209, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.005805583670735359, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.005287250969558954, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.0030482045840471983, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.002197355730459094, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.002999273594468832, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.0019249966135248542, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.0014967001043260098, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.0018554740818217397, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.001428110059350729, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0018431446515023708, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0018553610425442457, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.0013316862750798464, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.00139934744220227, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.012981564737856388, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.012981564737856388, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.0.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.12383346259593964, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07524830102920532, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.04520129784941673, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.05346905440092087, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.05342333763837814, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.024971095845103264, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.08639123290777206, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.06986746937036514, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.058456845581531525, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.033863697201013565, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.041386205703020096, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.04728761315345764, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.033575840294361115, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.025964735075831413, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.023839395493268967, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.02399805560708046, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.013798730447888374, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.011801020242273808, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.010140817612409592, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.008454792201519012, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.012312826700508595, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.010092426091432571, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.0077415816485881805, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.006660333834588528, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.12383346259593964, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.12383346259593964, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.0.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.11862926930189133, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.08146455883979797, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.055083755403757095, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.05426502600312233, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.051918454468250275, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.02858201414346695, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.08007155358791351, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.07109911739826202, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.056384507566690445, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.03556215763092041, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.03825845569372177, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.04104210063815117, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.034468211233615875, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.025977440178394318, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.02359953336417675, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.020777001976966858, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.01513257808983326, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.013471542857587337, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.012528750114142895, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.011022322811186314, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.011623015627264977, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.012381041422486305, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.008988787420094013, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.009744681417942047, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.11862926930189133, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.11862926930189133, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.0.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.135872945189476, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.12526541948318481, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.12190138548612595, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.11027750372886658, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.06184324622154236, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.05871524289250374, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.0692337229847908, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.06390572339296341, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.06255833059549332, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.055161893367767334, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.052677690982818604, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.035231318324804306, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.030721908435225487, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.02984655648469925, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.029649857431650162, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.01773182861506939, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.01608896441757679, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.01589049957692623, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.014872741885483265, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.014749609865248203, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.00989251583814621, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.010767032392323017, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.009621851146221161, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008326159790158272, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.12526541948318481, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.12526541948318481, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.0.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.1640801727771759, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15350447595119476, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1501566767692566, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.13581213355064392, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.07518342137336731, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.07205937057733536, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08335760980844498, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07707339525222778, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.07583342492580414, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06759627163410187, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.0643293708562851, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04211828112602234, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03659846633672714, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.03578094393014908, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.03559279441833496, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.021008841693401337, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.01836494728922844, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.01816115900874138, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.016872897744178772, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.016752837225794792, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01100622583180666, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.011011239141225815, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.010741183534264565, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.00726321479305625, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08335760980844498, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08335760980844498, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.0.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.10565554350614548, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.0898577943444252, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.08036485314369202, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.07013608515262604, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.046892620623111725, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.03988538682460785, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.06531263887882233, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.05627816542983055, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.048823028802871704, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.03844413161277771, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.0370175763964653, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.031648047268390656, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.02691705711185932, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.023425495252013206, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.02257055789232254, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.016632962971925735, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.013811114244163036, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.013433062471449375, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.012443696148693562, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.011971816420555115, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01057062391191721, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.010808276943862438, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.009703144431114197, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.009113401174545288, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.10565554350614548, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.10565554350614548, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.021786313503980637, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.014975975267589092, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.00834568776190281, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.009185505099594593, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.008966997265815735, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.0038961488753557205, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.01576116308569908, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.014248250983655453, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.010217860341072083, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.006923086941242218, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.007442130707204342, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.007981319911777973, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.006806170102208853, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.004496296867728233, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.0037669201847165823, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.0040135434828698635, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.0026019285432994366, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.002151251770555973, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.002264180686324835, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.0018226115498691797, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.00218828022480011, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.0022462529595941305, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.0014599525602534413, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0016515502939000726, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.021786313503980637, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.021786313503980637, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.01916961744427681, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.013530874624848366, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.007230005692690611, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.007960768416523933, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.00779873039573431, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.003142276545986533, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.014670627191662788, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.013018609955906868, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.008941681124269962, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0062543898820877075, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.006803237833082676, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.007323043886572123, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.006171438377350569, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.003886571153998375, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.0031315574888139963, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.003676199121400714, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.0022125295363366604, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.0017620079452171922, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.0019511242862790823, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.0014892269391566515, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0019498446490615606, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0019379695877432823, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.0011729757534340024, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.001350697479210794, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.01916961744427681, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.01916961744427681, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.15687517821788788, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.10400701314210892, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07290144264698029, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.07262558490037918, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.06739191710948944, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.03861146792769432, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.097943015396595, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.08870577812194824, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.07481810450553894, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.04563385620713234, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.04750165343284607, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.050003502517938614, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04236069694161415, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.03277769312262535, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.030111752450466156, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.02502056583762169, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.017408013343811035, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.015117721632122993, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.0133106904104352, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.011289299465715885, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.012949878349900246, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.012743684463202953, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.00950990617275238, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008382339961826801, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.10400701314210892, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.10400701314210892, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.1790170967578888, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.14394763112068176, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1278935670852661, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.10669150948524475, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.08232980221509933, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.06752030551433563, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.1068425178527832, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09441052377223969, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.0853804424405098, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.06028717756271362, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.05752932280302048, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05496952310204506, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04602859914302826, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.0406661257147789, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.03932978957891464, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.027804609388113022, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.022773316130042076, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.021698106080293655, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.018921857699751854, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.01806304045021534, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.015569519251585007, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01674978993833065, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.013820251449942589, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.013331894762814045, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.10669150948524475, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.10669150948524475, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.18451325595378876, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.17299166321754456, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.16926170885562897, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.15245454013347626, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.08628401160240173, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.08270719647407532, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09573747217655182, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08824022114276886, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.08701872080564499, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07714376598596573, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.0734286904335022, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04916203022003174, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04285798221826553, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.04202613607048988, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.04183635488152504, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02484210394322872, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.023014822974801064, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.02280653640627861, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.021339183673262596, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.02123679406940937, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.014134848490357399, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01573154330253601, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.013884641230106354, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012574827298521996, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09573747217655182, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09573747217655182, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.23155634105205536, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2178441435098648, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.21359190344810486, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.19282107055187225, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10871525853872299, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.10444149374961853, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1200852170586586, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.1107989102602005, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.10952591150999069, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09739015251398087, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.09268763661384583, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.061383895576000214, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.053404226899147034, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.05250002443790436, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.05228406935930252, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.030740030109882355, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.02799149416387081, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.027744144201278687, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.025854067876935005, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.025722824037075043, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016774022951722145, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.018215101212263107, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.01648871600627899, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.013766678050160408, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1200852170586586, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1200852170586586, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.018183507025241852, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.017725979909300804, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.007559920195490122, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.007097586989402771, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.00635826401412487, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.0034133975859731436, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.018401579931378365, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.017130883410573006, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.0064324187114834785, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.006063186563551426, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.006170604843646288, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.006120872683823109, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.005796879529953003, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.0029868092387914658, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.0026346726808696985, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.0028227742295712233, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.0023314093705266714, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.0010419212048873305, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.0022909916006028652, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.0009461079607717693, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.0022823999170213938, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.0022544702515006065, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.0007594806374981999, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.0007967101410031319, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.018183507025241852, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.018183507025241852, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.06450264155864716, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.050163038074970245, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.04163455218076706, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.03759708255529404, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.028430910781025887, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.021073002368211746, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.03972312808036804, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.03612586110830307, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.030437109991908073, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.021806901320815086, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.021606191992759705, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.020128337666392326, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.017235802486538887, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.013799764215946198, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.012872113846242428, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.010077111423015594, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.007332581095397472, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.006645409390330315, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.006071486975997686, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.0054264431819319725, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.0052525969222188, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.005268082022666931, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.004056969657540321, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0035430719144642353, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.06450264155864716, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.06450264155864716, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.06830711662769318, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.05103332921862602, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.039746277034282684, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.036859143525362015, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.029507210478186607, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.020118093118071556, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.04322650283575058, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.039307691156864166, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.032039813697338104, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.022265495732426643, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.022423774003982544, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.021839139983057976, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.01872904784977436, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.014358846470713615, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.013131935149431229, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01091733481734991, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.007672629319131374, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.006783255375921726, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.006304227747023106, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.005445263814181089, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.005692093167454004, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.005693350452929735, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.004214029759168625, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0037963446229696274, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.06830711662769318, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.06830711662769318, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.19482602179050446, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.15997496247291565, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.14532184600830078, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.1280706524848938, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.08895152062177658, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.07388681173324585, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11086051166057587, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10030480474233627, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.09281747043132782, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.06915438175201416, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.06614166498184204, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05642342567443848, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04798523709177971, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.04279625415802002, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.041499510407447815, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.028262203559279442, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.022081559523940086, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.020866241306066513, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.018106738105416298, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.01716288924217224, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.014766956679522991, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.014153210446238518, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.012883393093943596, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009011836722493172, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11086051166057587, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11086051166057587, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.18367870151996613, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.16259855031967163, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.15244761109352112, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.13053347170352936, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.08566276729106903, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.07644450664520264, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10480884462594986, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09514331072568893, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.08771582692861557, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07075001299381256, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.06593027710914612, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05433715134859085, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04637042433023453, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.042201701551675797, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.04117470979690552, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.027709001675248146, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.023442842066287994, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.02268076315522194, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.020764730870723724, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.02013109251856804, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.015824265778064728, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.0168832428753376, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.014550196006894112, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.013415520079433918, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10480884462594986, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10480884462594986, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.23579281568527222, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.22044426202774048, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2155829519033432, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.19361203908920288, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.11150623857975006, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.1063045784831047, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1238962858915329, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11419859528541565, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.11254561692476273, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0986454114317894, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.09330061078071594, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06337679177522659, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05487062782049179, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.05366293340921402, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.05338437110185623, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.031729806214571, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.028067762032151222, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.027745526283979416, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02552485652267933, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.025355013087391853, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.017062345519661903, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.017574623227119446, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.01668497920036316, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012500587850809097, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1238962858915329, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1238962858915329, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.2726430892944336, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2552716135978699, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.24971996247768402, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.2243814766407013, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.12881183624267578, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.12297821789979935, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.14259383082389832, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.13175849616527557, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.12996484339237213, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11409947276115417, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.10786810517311096, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07265256345272064, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06301411986351013, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.061711184680461884, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.06141090393066406, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03628706559538841, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.031661346554756165, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.031296584755182266, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.028671976178884506, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.02846847102046013, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01903270184993744, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01892438344657421, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.018604613840579987, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01241157203912735, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11409947276115417, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11409947276115417, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.21623803675174713, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.18879631161689758, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.17819969356060028, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.15430578589439392, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.0981031283736229, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.08772089332342148, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11651387065649033, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10637161880731583, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.10119381546974182, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08100376278162003, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.07570599764585495, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05925498902797699, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05094756931066513, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.04726313427090645, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.04636984318494797, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.02974749729037285, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.025017712265253067, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.02459060586988926, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.021756960079073906, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.021174153313040733, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.016147419810295105, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016498837620019913, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.014937800355255604, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011897490359842777, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11651387065649033, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11651387065649033, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.04940517991781235, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.04093748703598976, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.03563441336154938, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.0315464586019516, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.02231588400900364, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.01797555945813656, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.03037722408771515, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.027568073943257332, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.02339934930205345, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.017836159095168114, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.01737893745303154, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.015419499017298222, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.013168956153094769, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.010837340727448463, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.010222939774394035, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.007729867938905954, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.0057805730029940605, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.00534618366509676, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.0049256025813519955, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.0045179640874266624, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.004098891746252775, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.004105977714061737, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.0032959284726530313, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0028408293146640062, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.04940517991781235, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.04940517991781235, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.05066164955496788, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.040928252041339874, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.0340721420943737, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.03034328855574131, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.022469691932201385, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.01703883521258831, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.03228328004479408, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.029231244698166847, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.023794636130332947, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.017772015184164047, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.017634378746151924, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.01634836755692959, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.01393044088035822, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.010870485566556454, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.010027146898210049, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.008170788176357746, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.005718426313251257, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.005131690762937069, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.004805774427950382, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.0042241658084094524, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.004227224737405777, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.004107318818569183, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.003126629162579775, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.002622617408633232, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.05066164955496788, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.05066164955496788, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.20340901613235474, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.17076267302036285, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.15674203634262085, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.13743740320205688, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.09322693943977356, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.07943347096443176, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11420804262161255, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10468614101409912, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.09681228548288345, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07383206486701965, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.07026661187410355, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05806951969861984, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.049948133528232574, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.044747333973646164, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.04344324767589569, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.028984809294342995, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.022970406338572502, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.02180500142276287, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.019034938886761665, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.018109610304236412, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.014926196075975895, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.014488746412098408, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.013064343482255936, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008988269604742527, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11420804262161255, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11420804262161255, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.1961187869310379, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1741616576910019, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1655162125825882, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.1384093165397644, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.09190492331981659, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.08342259377241135, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.1081409603357315, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09804423153400421, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.09344057738780975, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07405053079128265, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.0676773339509964, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05569293349981308, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.047674015164375305, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.04486772418022156, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.0442020520567894, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.028093699365854263, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.024459410458803177, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.023866692557930946, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.021183786913752556, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.020764827728271484, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01564003899693489, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016860293224453926, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.01476210169494152, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.01313770841807127, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.1081409603357315, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.1081409603357315, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.23874065279960632, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.22298714518547058, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.21791915595531464, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.19597207009792328, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.11299341171979904, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.10760601609945297, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12549173831939697, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11587508022785187, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.11407861858606339, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09993217140436172, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.09464308619499207, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06411746144294739, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.055579543113708496, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.054297562688589096, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.05399785935878754, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.032072246074676514, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.028192216530442238, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.027848316356539726, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.025583436712622643, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.025393018499016762, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01707221008837223, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.017372367903590202, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.01664821058511734, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012008433230221272, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11587508022785187, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11587508022785187, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.277749240398407, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.25976794958114624, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.25405266880989075, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.22874610126018524, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.1315947026014328, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.125495046377182, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.14613382518291473, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.13478459417819977, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.13285022974014282, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11654583364725113, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.11047197133302689, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07460210472345352, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.064554862678051, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.063146211206913, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.06279807537794113, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03732726722955704, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.03247722610831261, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.03209023177623749, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.029429921880364418, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.02921639196574688, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01978667452931404, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.019561516121029854, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.019321542233228683, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01299549825489521, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11654583364725113, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11654583364725113, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.23848874866962433, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.21025030314922333, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.19967082142829895, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.17398634552955627, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.1089569479227066, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.0984300896525383, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.1280408799648285, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11711052805185318, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.11213704943656921, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.0908191129565239, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.0851602554321289, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06508477032184601, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.056059207767248154, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.052396662533283234, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.051513317972421646, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03268369287252426, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.027502816170454025, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.027080077677965164, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.023989902809262276, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.02341409958899021, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01762252487242222, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.017769111320376396, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.016414333134889603, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012514875270426273, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11711052805185318, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11711052805185318, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.06646668910980225, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.05576949939131737, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.04885363578796387, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.0435047410428524, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.030214712023735046, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.024563981220126152, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.04127189517021179, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.03718678653240204, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.031572915613651276, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.02448403276503086, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.02390379272401333, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.021022483706474304, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.017849577590823174, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.014716793783009052, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.013891500420868397, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.010554371401667595, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.007933611050248146, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.0073618656024336815, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.006858900189399719, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.006325473543256521, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.005638386122882366, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.0057175434194505215, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.004549416713416576, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0040726130828261375, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.06646668910980225, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.06646668910980225, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.06598538160324097, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.05402114614844322, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.04600747674703598, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.04117318242788315, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.029456935822963715, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.022998735308647156, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.041781939566135406, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.03744224086403847, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.031241795048117638, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.02364628203213215, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.023399285972118378, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.02128133736550808, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.017951009795069695, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.014315358363091946, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.013329005800187588, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.010686098597943783, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.007629414554685354, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.006956357043236494, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.00651563610881567, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.005862665828317404, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.00559983029961586, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.005506230518221855, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.004289090167731047, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0037329953629523516, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.06598538160324097, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.06598538160324097, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.21926243603229523, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.18697506189346313, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.17361193895339966, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.15284603834152222, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.10091745853424072, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.08757920563220978, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.121996209025383, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11182140558958054, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.10458499938249588, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08141621202230453, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.0771535336971283, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06203540787100792, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05350962653756142, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.048529691994190216, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.047284651547670364, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.031046681106090546, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.025013960897922516, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.023923145607113838, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.021088259294629097, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.020220881327986717, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01610833778977394, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.015835553407669067, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.014312731102108955, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010126384906470776, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.121996209025383, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.121996209025383, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.19005708396434784, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.16832026839256287, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.15974710881710052, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.13655796647071838, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.0882454589009285, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.07980582118034363, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10559997707605362, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09510022401809692, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.09009170532226562, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07276751846075058, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.06728048622608185, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05437730252742767, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04631051793694496, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.043236881494522095, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.04251391440629959, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.027540236711502075, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.023749738931655884, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.023116927593946457, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.020903972908854485, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.020444855093955994, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01542775146663189, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.0166225116699934, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.014436340890824795, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.013067414052784443, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10559997707605362, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10559997707605362, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.21791429817676544, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2024659514427185, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.19699349999427795, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.17713536322116852, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10316397994756699, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.09755916148424149, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1160316988825798, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10692935436964035, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.10439226776361465, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0907772034406662, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.08606357872486115, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.059286367148160934, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.051321882754564285, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.04964376240968704, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.04924752563238144, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02972358465194702, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.025844965130090714, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.025458116084337234, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.023372288793325424, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.023120760917663574, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.015881041064858437, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016087885946035385, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.01531747542321682, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011172394268214703, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1160316988825798, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1160316988825798, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.27784988284111023, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2588328719139099, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.25249314308166504, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.22723181545734406, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.13171912729740143, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.12498921900987625, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1472148597240448, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.1358359307050705, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.13319621980190277, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11628333479166031, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.1101994663476944, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07507352530956268, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06508061289787292, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.0632372498512268, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.06280481815338135, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03758195415139198, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.032548222690820694, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.03209340199828148, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.029401900246739388, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.029123134911060333, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.019802968949079514, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.019663162529468536, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.019176527857780457, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.013006524182856083, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11628333479166031, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11628333479166031, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.24664278328418732, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.22064147889614105, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.21111789345741272, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.1851656436920166, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.11339393258094788, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.10383254289627075, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.13250160217285156, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12062115967273712, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.11623348295688629, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09617242962121964, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.09011325240135193, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.067410908639431, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05785983055830002, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.05460098385810852, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.053806960582733154, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03387654945254326, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.028741801157593727, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.028360864147543907, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02540137991309166, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.024897385388612747, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01828857883810997, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.018579404801130295, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.01718265563249588, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.013269792310893536, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12062115967273712, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12062115967273712, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.07937280088663101, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.06858636438846588, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.061834972351789474, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.054865725338459015, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.036348987370729446, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.03086904250085354, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.04791006073355675, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.04343109205365181, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.03768644854426384, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.030139988288283348, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.029111048206686974, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.024314915761351585, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.020758232101798058, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.017637217417359352, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.0168183371424675, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01218761783093214, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.009365706704556942, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.008799910545349121, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.008171290159225464, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.007644640747457743, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.006465287413448095, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.006502642296254635, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.0053701624274253845, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0045127272605896, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.07937280088663101, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.07937280088663101, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.07505679130554199, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.06374015659093857, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.05609864369034767, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.049703449010849, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.03394027799367905, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.027859803289175034, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.046140287071466446, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.041997529566287994, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.03541461005806923, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.027830136939883232, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.02706787921488285, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.023343244567513466, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.019995475187897682, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.016383958980441093, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.015417806804180145, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01167784072458744, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.008540668524801731, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.007866931147873402, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.00733616016805172, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.006681140046566725, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0060385665856301785, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0058812666684389114, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.004755452740937471, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.003734900848940015, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.07505679130554199, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.07505679130554199, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.2348296046257019, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.20660890638828278, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.19585222005844116, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.1726771742105484, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.1091310977935791, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.0980541929602623, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1282501071691513, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.1176978051662445, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.1119207814335823, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09057704359292984, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.08542594313621521, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06515076756477356, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.056201670318841934, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.05232280120253563, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.05137834697961807, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03250519186258316, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.026623474434018135, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.02575242705643177, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02277250401675701, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.02211468666791916, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.016712142154574394, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.016083357855677605, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.015294160693883896, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009764964692294598, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.1176978051662445, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.1176978051662445, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.2227574735879898, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.20178137719631195, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1935092657804489, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.16954797506332397, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.10403525084257126, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.09602127224206924, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12087387591600418, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11055054515600204, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.10574957728385925, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08812972158193588, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.08280431479215622, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06232864409685135, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05330696702003479, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.050283439457416534, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.04956762120127678, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.0313778817653656, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.026626989245414734, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.02600266970694065, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.023548021912574768, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.023091517388820648, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01714368909597397, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.017452441155910492, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.01610400155186653, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012707101181149483, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12087387591600418, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12087387591600418, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.21240472793579102, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.197524294257164, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.19227217137813568, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.17281323671340942, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10050816833972931, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.09506699442863464, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.11285421252250671, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10414982587099075, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.10167980194091797, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.08846364170312881, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.08388829231262207, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05759212747216225, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.049910418689250946, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.048257242888212204, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.047865916043519974, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.028811736032366753, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.024921579286456108, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.024539148434996605, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.022490844130516052, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.02224133536219597, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.015210911631584167, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01522599346935749, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.014650749042630196, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010220265947282314, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.11285421252250671, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.11285421252250671, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.275599867105484, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.25718021392822266, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.25098294019699097, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.2260923683643341, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.1306743323802948, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.12413014471530914, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.14599670469760895, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.1347665935754776, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.13208700716495514, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.1155354306101799, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.10970151424407959, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.0744616761803627, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06453389674425125, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.0627010315656662, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.062270186841487885, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.037233710289001465, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.03218219056725502, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.03173864632844925, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02909149043262005, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.028813106939196587, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01952739804983139, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01933034509420395, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.018909547477960587, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012622273527085781, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.1155354306101799, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.1155354306101799, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.24651259183883667, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.22298981249332428, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.2141164243221283, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.18866993486881256, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.11386793106794357, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.10503517836332321, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.13265831768512726, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12086464464664459, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.11639987677335739, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09779942035675049, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.09170603007078171, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06766628473997116, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05793381109833717, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.054761335253715515, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.053986821323633194, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03403552249073982, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.028682224452495575, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.02829754538834095, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02554202266037464, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.0250440314412117, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.018402818590402603, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.018348412588238716, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.017321646213531494, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012887289747595787, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12086464464664459, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12086464464664459, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.07545870542526245, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.06484794616699219, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.05730906501412392, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.05092088505625725, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.03445415571331978, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.028535738587379456, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.047302503138780594, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.04265616834163666, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.03582535311579704, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.02854052372276783, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.02781977877020836, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.024024635553359985, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.02041330561041832, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.016758820042014122, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.01578642427921295, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01204411406069994, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.008957142941653728, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.008305447176098824, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.007826785556972027, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.00720476359128952, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.00635042367503047, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.006393487565219402, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.005040902644395828, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.004444445483386517, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.07545870542526245, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.07545870542526245, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.07159778475761414, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.06042886897921562, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.052128683775663376, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.04642448201775551, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.0322820283472538, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.025809546932578087, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.04545526206493378, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.041174426674842834, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.03376123681664467, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.026436103507876396, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.0260063074529171, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0230562686920166, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.019634736701846123, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.015623078681528568, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.01454688049852848, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.011518583633005619, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.008241484872996807, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.007497480139136314, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.007105596363544464, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.0063831801526248455, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.005996066611260176, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.00586920278146863, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.0045726243406534195, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0038318783044815063, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.07159778475761414, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.07159778475761414, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.21121984720230103, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.1862284392118454, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.17525450885295868, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.15472787618637085, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.09820113331079483, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.08769487589597702, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11798304319381714, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10809165984392166, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.10078517347574234, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08179821074008942, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.07740984857082367, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05992744490504265, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05163702368736267, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.0471620075404644, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.046060699969530106, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.029942821711301804, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.024166470393538475, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.02323855832219124, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.020787283778190613, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.02004089206457138, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.015408923849463463, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.015009729191660881, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.013767097145318985, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.00934314914047718, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11798304319381714, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11798304319381714, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.21234926581382751, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1899186670780182, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.18108054995536804, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.15750280022621155, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.09922406077384949, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.09008210152387619, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11711931973695755, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10605181008577347, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.10107996314764023, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08281087875366211, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.07710500806570053, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.0601922944188118, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.051262564957141876, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.0481211356818676, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.04736753925681114, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.030211448669433594, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.025655901059508324, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.024985207244753838, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.022520089522004128, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.02203039638698101, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01634524203836918, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.017066968604922295, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.015318449586629868, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012646798975765705, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11711931973695755, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11711931973695755, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.19725966453552246, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.18373320996761322, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.17873233556747437, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.16107913851737976, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.09337062388658524, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.08835305273532867, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10520736873149872, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.09707725048065186, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.09444786608219147, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.08247300982475281, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.0782475471496582, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05368161201477051, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04654589667916298, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.044878195971250534, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.044480904936790466, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.026876721531152725, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.023246388882398605, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.022874830290675163, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.021056069061160088, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.020803160965442657, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.014204473234713078, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.014330104924738407, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.013630099594593048, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009724300354719162, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10520736873149872, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10520736873149872, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.2679159641265869, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2503170967102051, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.24415136873722076, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.2201511263847351, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.12712237238883972, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.12068361043930054, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.14236849546432495, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.1314294934272766, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.12848223745822906, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11259785294532776, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.10684695094823837, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07256998121738434, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06292843073606491, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.060984477400779724, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.06052764505147934, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.036292094737291336, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.03130938112735748, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.030849309638142586, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02833852358162403, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.028044329956173897, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01899717003107071, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01883605308830738, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.01833866722881794, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012293302454054356, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11259785294532776, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11259785294532776, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.24709098041057587, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.22573475539684296, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.2178356796503067, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.1940140724182129, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.1146673932671547, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.10677589476108551, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.13306787610054016, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12081466615200043, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.11700930446386337, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09994795173406601, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.09456610679626465, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.0677572712302208, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05796114727854729, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.05516946315765381, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.0544993132352829, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03399457409977913, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.028950713574886322, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.028625912964344025, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.026089243590831757, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.0256658885627985, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01828577369451523, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01853257790207863, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.017334584146738052, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.013168255798518658, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12081466615200043, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12081466615200043, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.08235334604978561, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.07304643839597702, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.06458967924118042, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.05737628415226936, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.03803374618291855, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.031845297664403915, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.05335359647870064, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.04794786125421524, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.039112865924835205, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.03238597512245178, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.031677111983299255, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.0272210706025362, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.02305280603468418, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.018523622304201126, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.017313668504357338, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.013652952387928963, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.00999673455953598, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.009219540283083916, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.008931622840464115, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.008166407234966755, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.007226285524666309, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.00730552664026618, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.00560694420710206, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005133424885571003, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.08235334604978561, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.08235334604978561, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.074422687292099, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.06520197540521622, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.05600529536604881, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.049630362540483475, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.033861566334962845, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.027375008910894394, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.049097467213869095, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04459192231297493, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.03495984897017479, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.028675217181444168, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.028191545978188515, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.024860095232725143, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02129642851650715, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.016429493203759193, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.015094785019755363, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01249134074896574, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.008719753473997116, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.007861971855163574, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.0077252634800970554, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.006853082217276096, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.006466968916356564, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.006395652890205383, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.0047330851666629314, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004201368894428015, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.074422687292099, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.074422687292099, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.22504809498786926, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.20148645341396332, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.19176536798477173, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.1692185252904892, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.10532021522521973, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.09565496444702148, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.12448948621749878, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11354207247495651, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.10752410441637039, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08854973316192627, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.08365917950868607, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06341206282377243, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05425814911723137, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.05049435794353485, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.0495697520673275, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03165755420923233, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.025827152654528618, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.025020495057106018, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.022385403513908386, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.02176581881940365, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.016347885131835938, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.015792157500982285, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.014878579415380955, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009846074506640434, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.12448948621749878, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.12448948621749878, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.22086255252361298, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.19976575672626495, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.19124050438404083, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.16636130213737488, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.10331429541110992, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.094968281686306, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12289157509803772, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11027757078409195, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.10509494692087173, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08719862997531891, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.08174953609704971, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06328919529914856, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.053564753383398056, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.050498418509960175, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.049763280898332596, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03198442980647087, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.02745935507118702, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.02681450918316841, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.024452069774270058, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.0239882729947567, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.017842957749962807, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.018824785947799683, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.016867730766534805, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.014574328437447548, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12289157509803772, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12289157509803772, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.19028311967849731, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.17762845754623413, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.17310237884521484, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.15610404312610626, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.09024621546268463, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.08560790121555328, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10127374529838562, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.09339668601751328, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.09123805165290833, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07985100895166397, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.07582641392946243, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.0517716184258461, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04483772814273834, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.04341451823711395, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.04307878017425537, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.025918077677488327, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.022565126419067383, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.022236747667193413, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.020489702001214027, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.020272983238101006, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.013782539404928684, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01399162132292986, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.01330479234457016, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009653939865529537, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10127374529838562, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10127374529838562, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.26792144775390625, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2508153021335602, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.24495600163936615, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.22124284505844116, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.12737169861793518, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.12123026698827744, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.14250536262989044, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.13123737275600433, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.12869928777217865, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11305072903633118, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.10748327523469925, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07290315628051758, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06298519670963287, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.06124339997768402, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.06083174794912338, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.0364995151758194, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.03167441487312317, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.031250130385160446, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.028770310804247856, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.028507301583886147, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.019416568800807, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.019364463165402412, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.01882212795317173, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.013114106841385365, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11305072903633118, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11305072903633118, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.23961028456687927, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.21830561757087708, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.2104252725839615, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.18757396936416626, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.11102302372455597, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.10303041338920593, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12876461446285248, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11727995425462723, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.11329200863838196, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09656230360269547, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.0915447548031807, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06569304317235947, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05620265007019043, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.05337891727685928, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.05270440876483917, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03294669836759567, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.027927454560995102, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.02758852392435074, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.025094352662563324, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.024661727249622345, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01771550066769123, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.017788156867027283, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.01677853800356388, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012493195943534374, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11727995425462723, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11727995425462723, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.08069509267807007, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.0709567666053772, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.0647379606962204, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.05697203800082207, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.0372086837887764, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.03214942291378975, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.047990087419748306, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.04383327066898346, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.03825024887919426, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.031018730252981186, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.029732204973697662, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.024376526474952698, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.020938873291015625, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.017973467707633972, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.01721177063882351, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.012205944396555424, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.009454603306949139, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.008912794291973114, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.008245748467743397, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.0077498131431639194, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.006413809023797512, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.00641964515671134, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.005387085489928722, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.004342775791883469, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.08069509267807007, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.08069509267807007, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.07346834242343903, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.0632549300789833, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.05664234980940819, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.04964262247085571, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.03334808722138405, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.027990803122520447, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.044199083000421524, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04030737653374672, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.034537818282842636, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.02739250473678112, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.026403363794088364, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.02242201939225197, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.019258636981248856, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.01610705628991127, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.015281368046998978, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.011232621036469936, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.008432688191533089, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.007860123179852962, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.007267615292221308, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.006721780635416508, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.005844707600772381, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.005781013984233141, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.004736687988042831, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0038044096436351538, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.07346834242343903, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.07346834242343903, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.21018798649311066, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.1858985424041748, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.17590828239917755, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.15418876707553864, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.09761574864387512, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.08776918798685074, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11693359911441803, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10597418993711472, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.10003366321325302, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08106354624032974, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.07648184895515442, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05958537384867668, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05070556700229645, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.04692312330007553, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.045971713960170746, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.029786398634314537, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.024174408987164497, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.02334766648709774, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02083776704967022, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.02018943428993225, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.015408172272145748, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.015108906663954258, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.013879403471946716, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009798599407076836, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11693359911441803, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11693359911441803, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.216681569814682, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.19740770757198334, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1896120309829712, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.16571366786956787, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.10144101828336716, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.0937836691737175, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11903487145900726, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10803502053022385, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.10310399532318115, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08626454323530197, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.08120342344045639, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06117527559399605, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.051989126950502396, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.04903312399983406, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.04829634353518486, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03062661737203598, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.025842970237135887, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.02524971216917038, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02287530153989792, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.022408297285437584, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.016377244144678116, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016783079132437706, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.015413308516144753, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012050347402691841, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11903487145900726, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11903487145900726, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.18373064696788788, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.17177869379520416, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.16740617156028748, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.15128102898597717, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.08711200207471848, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.0827186182141304, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09797146916389465, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.09026975184679031, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.08806505799293518, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07726152241230011, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.07348419725894928, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.050033386796712875, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04329897463321686, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.041881389915943146, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.041545573621988297, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.025029651820659637, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.02171500213444233, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.021394815295934677, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.019728051498532295, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.01951933093369007, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.013241390697658062, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.013394812121987343, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.012758691795170307, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.0091489776968956, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09797146916389465, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09797146916389465, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.2629384994506836, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.24621804058551788, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.24044224619865417, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.21727626025676727, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.12497234344482422, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.11888521909713745, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13989774882793427, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.1288682371377945, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.12623798847198486, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11086434125900269, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.10541597753763199, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07140617072582245, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06173107773065567, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.059984151273965836, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.059555452316999435, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.035689759999513626, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.03083307482302189, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.03041110374033451, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.027955546975135803, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.02769271656870842, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.018708378076553345, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.018590819090604782, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.018105316907167435, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01224274467676878, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.12497234344482422, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.12497234344482422, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.2385038435459137, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.21821238100528717, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.21091611683368683, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.18832026422023773, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.11061029136180878, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.10315483808517456, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12751850485801697, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11631780117750168, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.11278488487005234, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09666334837675095, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.09136287868022919, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06503050029277802, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.055789534002542496, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.05318884551525116, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.05258290097117424, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.032670557498931885, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.027866706252098083, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.027562951669096947, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.025140324607491493, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.024751516059041023, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01766359992325306, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.017752189189195633, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.01680431328713894, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.01254677027463913, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11631780117750168, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11631780117750168, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10178575664758682, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.0899520218372345, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08247794955968857, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.0727803111076355, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.04703030735254288, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.040959134697914124, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.060524992644786835, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05494869127869606, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.048268288373947144, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.03943290933966637, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.03781740739941597, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.030813848599791527, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.026366446167230606, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.02280311845242977, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.021899152547121048, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01547225471585989, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.012175245210528374, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.011540709063410759, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.010730412788689137, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.010153102688491344, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.00829386617988348, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008441791869699955, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.007052407134324312, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006005303934216499, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10178575664758682, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10178575664758682, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09136901795864105, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07967434078454971, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07200171798467636, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.06330122798681259, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.04173455387353897, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.035542335361242294, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05493582785129547, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04985606670379639, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.043052103370428085, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03467198461294174, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.033422231674194336, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.027939634397625923, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.023847751319408417, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.02013157308101654, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.01916475221514702, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.013961412943899632, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.010529384016990662, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.009856688790023327, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.0091446777805686, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.00850672461092472, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.007272840477526188, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007156872656196356, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.005965569522231817, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004718931391835213, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09136901795864105, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09136901795864105, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.21619045734405518, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.19488143920898438, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1866987645626068, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.16386322677135468, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.10117349773645401, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.09290267527103424, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11739804595708847, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10753444582223892, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.10298191010951996, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0850781798362732, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.0799095556139946, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.059670113027095795, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05133354291319847, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.048459213227033615, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.047742582857608795, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.029775451868772507, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.02463931404054165, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.024001983925700188, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02131236158311367, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.02084924653172493, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.015261929482221603, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.014743253588676453, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.0141672408208251, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008993718773126602, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11739804595708847, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11739804595708847, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.2183239459991455, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.19921492040157318, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.19148758053779602, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.17001530528068542, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.1022198498249054, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.09459822624921799, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11917398124933243, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10858359932899475, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.10388383269309998, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08748897165060043, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.08330777287483215, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06111004203557968, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05216183885931969, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.04923129081726074, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.04853249713778496, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03054177761077881, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.025629399344325066, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.025047607719898224, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02269621752202511, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.022232798859477043, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.016114776954054832, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016224481165409088, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.015158365480601788, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011159990914165974, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11917398124933243, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11917398124933243, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.18237563967704773, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.17014171183109283, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.16564495861530304, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.14951208233833313, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.08642315119504929, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.08188838511705399, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09737555682659149, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08962557464838028, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.08737102895975113, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07645438611507416, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.07280140370130539, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04981120303273201, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04307013005018234, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.041625943034887314, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.04128279909491539, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.024950647726655006, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.021754678338766098, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.02143259160220623, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01977982185781002, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.019565807655453682, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.013333221897482872, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.013683550991117954, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.012847689911723137, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009644978679716587, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09737555682659149, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09737555682659149, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.2555590271949768, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.23866376280784607, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2328232377767563, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.21004119515419006, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.12126734107732773, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.11512405425310135, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13601745665073395, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.1252126693725586, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.12256809324026108, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10725187510251999, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.1020316556096077, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06955885887145996, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.060058947652578354, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.05827254056930542, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.05785110965371132, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03482390195131302, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.030147185549139977, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.02971571497619152, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02731725201010704, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.027047380805015564, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.018471883609890938, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01847410760819912, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.017857711762189865, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012509767897427082, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.1252126693725586, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.1252126693725586, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.2443639039993286, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.2233692854642868, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.21551845967769623, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.19241715967655182, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.11327897757291794, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.10542155802249908, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.13118061423301697, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11961083859205246, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.1155664250254631, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09889214485883713, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.0934690535068512, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06665054708719254, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05724465847015381, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.05439720302820206, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.05372120440006256, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.033391620963811874, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.028334256261587143, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.027989324182271957, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.025489958003163338, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.025055386126041412, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.017824161797761917, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01784057915210724, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.016871927306056023, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012307021766901016, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11961083859205246, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11961083859205246, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.08920057862997055, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.0793343111872673, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.07345598936080933, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.06414588540792465, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.04132511839270592, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.03648855909705162, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.052041035145521164, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.04744469374418259, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.04225873574614525, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.03453062102198601, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.032845109701156616, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.026429593563079834, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.022655921056866646, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.01993698999285698, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.019258324056863785, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.013228898867964745, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.010453769005835056, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.009943663142621517, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.009122973307967186, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.008662857115268707, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.006947566755115986, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.006962739396840334, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.005985897034406662, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.004707671236246824, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.08920057862997055, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.08920057862997055, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08320367336273193, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07312746345996857, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06731121242046356, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.05862545967102051, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.03816734254360199, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.033317696303129196, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.048680149018764496, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.044114239513874054, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.03916388377547264, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.031645938754081726, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.030072201043367386, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.024659279733896255, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02108568325638771, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.01838432252407074, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.017690764740109444, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.012319332920014858, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.009529590606689453, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.009019807912409306, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.008217597380280495, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.007750087883323431, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.006375654600560665, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.006250554230064154, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.005395050160586834, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004020801745355129, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08320367336273193, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08320367336273193, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.2025793492794037, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.1811896115541458, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1729697734117508, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.1507185995578766, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.09469293802976608, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.08644796907901764, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11150183528661728, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10098341107368469, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.09655262529850006, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07883094251155853, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.07385789602994919, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.056801822036504745, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.048225969076156616, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.04540238156914711, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.044739980250597, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.028361473232507706, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.023250266909599304, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.022611042484641075, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.020002633333206177, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.019541455432772636, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.014646236784756184, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.014160948805510998, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.013450135476887226, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008963325060904026, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11150183528661728, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11150183528661728, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.21150800585746765, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.19227056205272675, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.18398182094097137, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.16178663074970245, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.09841262549161911, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.09045474231243134, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11660637706518173, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10596978664398193, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.10018539428710938, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08411148935556412, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.07945621758699417, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05990849435329437, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05101601779460907, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.04758840799331665, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.0467565655708313, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.030079396441578865, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.0252153929322958, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.024537263438105583, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02243953011929989, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.021910324692726135, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01626213826239109, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.0166005976498127, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.015151155181229115, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012003238312900066, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11660637706518173, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11660637706518173, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.17713268101215363, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.16447119414806366, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15965653955936432, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.14356930553913116, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.08378832042217255, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.07899821549654007, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09513672441244125, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08742142468690872, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.08482956886291504, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07368336617946625, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.07005906105041504, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.048683445900678635, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04199102520942688, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.04035481810569763, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.03997085243463516, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.0244111530482769, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.02112528681755066, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.020771948620676994, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.019130533561110497, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.01888619363307953, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.013089989311993122, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.013365893624722958, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.012528372928500175, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.00944557972252369, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09513672441244125, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09513672441244125, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.24546952545642853, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.22806280851364136, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.22183160483837128, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.19941136240959167, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.11621823161840439, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.10976701229810715, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13118550181388855, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12067554146051407, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.11759070307016373, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10213823616504669, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.097016841173172, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06704984605312347, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05783775448799133, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.05581916868686676, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.05534171313047409, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.0335349515080452, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.028847094625234604, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.028385421261191368, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.026013752445578575, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.02570856735110283, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.017682194709777832, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.017696384340524673, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.01697733998298645, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.0119168097153306, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12067554146051407, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12067554146051407, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.25358685851097107, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.23278319835662842, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.22504565119743347, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.2011585235595703, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.11812411993741989, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.11035940796136856, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.1367778331041336, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12412656098604202, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.12035886198282242, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.10346353054046631, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.09812790900468826, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06983760744333267, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05956641584634781, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.05685211718082428, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.05620310828089714, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.035042911767959595, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.029780477285385132, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.02945554070174694, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02691253274679184, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.026497265323996544, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01884983293712139, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01896023564040661, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.017919737845659256, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.013394220732152462, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12412656098604202, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12412656098604202, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10285772383213043, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.09124274551868439, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08452541381120682, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.0737830102443695, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.04787652939558029, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.04220954701304436, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06025421619415283, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05469224229454994, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.04894154891371727, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.039835311472415924, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.037903279066085815, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.030657071620225906, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.026232842355966568, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.02317972108721733, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.02241741679608822, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.015350244008004665, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.012290628626942635, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.011731956154108047, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.010739394463598728, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.010247030295431614, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008142097853124142, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008329594507813454, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.007062025833874941, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005876459181308746, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10285772383213043, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10285772383213043, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.0899067297577858, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07930999249219894, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.0720701739192009, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.06281855702400208, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.041372619569301605, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.03576892986893654, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05390315130352974, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.0489795058965683, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.04239913821220398, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03431380167603493, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.03286273404955864, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.027328116819262505, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.023388870060443878, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.019975652918219566, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.01911880075931549, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.013651560060679913, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.010493829846382141, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.00988046545535326, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009134449064731598, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.008553042076528072, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0071793231181800365, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007149141281843185, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.005998819600790739, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004810846410691738, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.0899067297577858, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.0899067297577858, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.21917852759361267, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.1942073106765747, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.18451747298240662, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.1599118709564209, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.10218728333711624, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.09226524084806442, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.12210387736558914, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10961615294218063, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.10425850749015808, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08403972536325455, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.07857472449541092, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.062165942043066025, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05244912579655647, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.04898291453719139, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.048125963658094406, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.031165387481451035, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.02508155070245266, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.024309203028678894, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.021399930119514465, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.02082587033510208, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.016089962795376778, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.015375298447906971, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.014518040232360363, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009728820994496346, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.12210387736558914, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.12210387736558914, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.2202908992767334, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1978607475757599, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.18328027427196503, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.16133293509483337, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.10306545346975327, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.09134532511234283, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.13337673246860504, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11962059140205383, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.10572395473718643, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08752719312906265, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.08446916192770004, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06918486952781677, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.057929445058107376, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.05040372163057327, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.04847461357712746, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03474690765142441, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.027326058596372604, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.02608916349709034, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02440834976732731, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.023216158151626587, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01893901452422142, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.019360002130270004, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.016402093693614006, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.014370587654411793, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11962059140205383, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11962059140205383, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.17837871611118317, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.16509489715099335, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15996801853179932, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.14382994174957275, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.08437933027744293, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.07927463203668594, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09656981378793716, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08846636116504669, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.08551353961229324, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07404907792806625, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.0705244392156601, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04948963597416878, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.042612217366695404, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.04076545685529709, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.040323905646800995, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02485613524913788, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.02151026576757431, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.021125975996255875, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.019481269642710686, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.019209183752536774, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.013443022035062313, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.013862800784409046, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.012811352498829365, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010035322979092598, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09656981378793716, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09656981378793716, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.24693097174167633, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.22873349487781525, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.22207340598106384, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.19971662759780884, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.11667428910732269, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.10991673171520233, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13248613476753235, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12158173322677612, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.11819878220558167, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10240892320871353, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.0973004475235939, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06746349483728409, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05826190859079361, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.056042153388261795, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.055508095771074295, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03377426788210869, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.02891986444592476, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.028421150520443916, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.026025649160146713, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.025681685656309128, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.017702285200357437, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01769588701426983, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.016898609697818756, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011797474697232246, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12158173322677612, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12158173322677612, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.2551116645336151, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.23389577865600586, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.22602716088294983, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.20209763944149017, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.11891447752714157, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.11098436266183853, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.13766515254974365, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12509450316429138, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.12123080343008041, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.10412819683551788, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.09870772063732147, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.0703689455986023, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.06009075418114662, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.057258978486061096, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.05658019334077835, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03532937914133072, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.03000655211508274, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.029669761657714844, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02710946835577488, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.026679635047912598, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01890856772661209, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.019119931384921074, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.017917675897479057, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.0135122025385499, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12509450316429138, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12509450316429138, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10781148821115494, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.09541049599647522, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08702586591243744, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.0761466920375824, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.050107527524232864, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.04345338046550751, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06618758291006088, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05898425355553627, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.05133679136633873, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04179010167717934, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.04035995155572891, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.033844854682683945, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.028425224125385284, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.024364566430449486, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.023328937590122223, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.017074711620807648, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.013095450587570667, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.012374875135719776, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01151891890913248, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.01084829494357109, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009183069691061974, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.00920042209327221, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.007676415145397186, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006613502744585276, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10781148821115494, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10781148821115494, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09677444398403168, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08490440249443054, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07696551084518433, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.06710393726825714, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.044533975422382355, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.03819635137915611, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05869656056165695, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.052965082228183746, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.045756854116916656, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03687567263841629, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.03540101274847984, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0299305971711874, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02538481168448925, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.021586094051599503, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.020595494657754898, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.015019012615084648, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.011424400843679905, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.010743286460638046, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009949393570423126, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.009306677617132664, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.007905388250946999, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007894709706306458, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.006535505875945091, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005429749842733145, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09677444398403168, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09677444398403168, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.21465344727039337, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.19184985756874084, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.18228761851787567, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.15934810042381287, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.10065817832946777, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.09130241721868515, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.12042965739965439, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10857494920492172, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.10268262773752213, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0838632807135582, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.07909365743398666, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.061485424637794495, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05194889381527901, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.04837071895599365, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.047484684735536575, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.030772030353546143, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.02476675808429718, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.023985901847481728, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.021311834454536438, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.020725850015878677, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01590137556195259, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.015215719118714333, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.014409152790904045, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009592755697667599, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.12042965739965439, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.12042965739965439, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.23206232488155365, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.2070472538471222, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.197238028049469, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.1738007515668869, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.10826286673545837, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.09842360764741898, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12828359007835388, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11605138331651688, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.11060306429862976, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09115712344646454, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.08586237579584122, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06610284745693207, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.056102488189935684, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.05252723768353462, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.051665183156728745, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.033230409026145935, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.02799265645444393, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.027250800281763077, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02466878667473793, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.024132071062922478, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.018038997426629066, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01857743225991726, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.01690182462334633, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.013694229535758495, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11605138331651688, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11605138331651688, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.18245583772659302, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.16870318353176117, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.16353143751621246, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1469661146402359, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.08644846826791763, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.08123719692230225, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09873858839273453, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.09036405384540558, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.08760175108909607, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07567017525434494, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.07215787470340729, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.0506083108484745, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.0436260886490345, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.041883260011672974, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.04146594554185867, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02549021877348423, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.022301794961094856, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.021935444325208664, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.020226014778017998, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.019978227093815804, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.013985944911837578, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.014618982560932636, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.01341920718550682, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010885224677622318, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09873858839273453, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09873858839273453, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.25458046793937683, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.23577363789081573, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.22913570702075958, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.20571553707122803, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.12060796469449997, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.11364918947219849, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13622495532035828, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12515564262866974, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.12215341627597809, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10553775727748871, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.1001683846116066, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06959668546915054, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06003989279270172, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.05797896161675453, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.057488903403282166, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03479481860995293, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.030003029853105545, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.029513398185372353, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.026960602030158043, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.026648882776498795, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01831785961985588, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01845262385904789, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.017607307061553, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012500091455876827, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12515564262866974, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12515564262866974, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.25737035274505615, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.23469214141368866, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.22542499005794525, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.2011108249425888, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.11960912495851517, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.11065136641263962, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.14058810472488403, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1276397407054901, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.12216520309448242, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.10414072871208191, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.09877824783325195, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.0716567188501358, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.06130412966012955, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.05767509341239929, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.05680854618549347, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.036068473011255264, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.03043377213180065, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.029984528198838234, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.027440961450338364, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.026876026764512062, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.019463885575532913, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01978498324751854, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.01822163723409176, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.014169672504067421, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.12216520309448242, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.12216520309448242, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10880056023597717, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.09747272729873657, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09139151871204376, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.08002860099077225, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.05080259218811989, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.04555204510688782, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06240563839673996, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05673246085643768, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.051781393587589264, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.0425933375954628, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.040439821779727936, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.031746555119752884, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.02712680771946907, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.02446916699409485, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.023815644904971123, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.0158870667219162, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.012763550505042076, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.01226008590310812, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01113149058073759, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.010701285675168037, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008325127884745598, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008297305554151535, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.007341288961470127, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0055895536206662655, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10880056023597717, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10880056023597717, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09357883036136627, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.0837414413690567, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07752278447151184, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.06790225952863693, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.043444085866212845, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.03840366750955582, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05497540906071663, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04998961091041565, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.04428353160619736, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03652677685022354, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.03478151187300682, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.027858184650540352, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.023890484124422073, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.020901018753647804, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.020144667476415634, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.013924963772296906, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.010876991786062717, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.010340162552893162, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.00951390340924263, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.009022320620715618, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.007261928636580706, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007170972879976034, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.006224616430699825, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0047261882573366165, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09357883036136627, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09357883036136627, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.23075927793979645, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.20670469105243683, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.19745628535747528, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.17242678999900818, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.10807368159294128, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.09867288172245026, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1272294968366623, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11473201960325241, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.11000758409500122, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08998110145330429, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.08437170833349228, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06492242217063904, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.054793763905763626, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.05178343877196312, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.05103588104248047, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03241587430238724, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.02638825587928295, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.025678079575300217, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02264833264052868, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.02214597351849079, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.016656121239066124, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.015848737210035324, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.01524857897311449, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009790736250579357, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11473201960325241, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11473201960325241, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.2454681694507599, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.2259545922279358, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.21766303479671478, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.19486340880393982, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.11483027786016464, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.10708268731832504, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.13417288661003113, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.12230685353279114, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.11666110157966614, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.10006684064865112, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.09621382504701614, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06890834867954254, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05895918980240822, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.0555872842669487, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.05476915463805199, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03458046540617943, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.029424026608467102, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.028788892552256584, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.026547983288764954, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.026042258366942406, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.018597157672047615, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.019279485568404198, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.01749456487596035, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.014028025791049004, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.12230685353279114, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.12230685353279114, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.18546491861343384, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.17149671912193298, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1660485714673996, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1493319869041443, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.08795083314180374, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.0825260654091835, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10057108849287033, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.09225422143936157, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.08911770582199097, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07702168077230453, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.07338887453079224, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05165216326713562, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04448700323700905, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.042525649070739746, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.042067594826221466, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.025889432057738304, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.02249891310930252, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.022086558863520622, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.020379826426506042, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.02008482813835144, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.013962466269731522, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.014596786350011826, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.013292357325553894, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010646659880876541, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10057108849287033, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10057108849287033, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.25859856605529785, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2396133542060852, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2327704131603241, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.20923346281051636, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.12260916829109192, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.11553091555833817, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13898144662380219, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12751983106136322, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.12415254861116409, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10741405189037323, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.10205953568220139, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07108238339424133, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06117122992873192, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.05895950272679329, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.058438315987586975, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.0356096476316452, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.03049924038350582, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.029987577348947525, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.027425462380051613, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.02708665281534195, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.018909847363829613, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01875479519367218, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.018128812313079834, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01265701837837696, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.12415254861116409, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.12260916829109192, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.2657734155654907, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.24137015640735626, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.23215171694755554, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.20657330751419067, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.12356013804674149, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.1142532229423523, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.14411026239395142, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.13092990219593048, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.1263326108455658, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.10692296922206879, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.10135582834482193, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.07365624606609344, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.06284921616315842, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.05947626754641533, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.05865537375211716, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03702618554234505, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.03111092373728752, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.03071165643632412, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.027842842042446136, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.02732539176940918, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01986738294363022, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01983528584241867, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.018716532737016678, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.01388238649815321, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.12356013804674149, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.12356013804674149, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.11244747042655945, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.101014144718647, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09356185048818588, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.08240140229463577, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.05248011276125908, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.04654640331864357, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06693649291992188, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.060489390045404434, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.05358221009373665, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04443851485848427, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.042622294276952744, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03402495011687279, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.02897696942090988, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.02540304698050022, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.02449570596218109, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.017060209065675735, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.013412903994321823, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.012770554982125759, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.011834566481411457, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.011248262599110603, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009014365263283253, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009063657373189926, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.007708766497671604, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006273930426687002, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.11244747042655945, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.11244747042655945, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09498320519924164, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08571556955575943, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07731755077838898, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.06814952194690704, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.04399921000003815, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.03799578547477722, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05944197624921799, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05391433835029602, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.044944774359464645, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.037686094641685486, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.03639186546206474, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.030057314783334732, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.025795506313443184, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.021280569955706596, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.02011917158961296, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.015000916086137295, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.011214700527489185, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.010463022626936436, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009973225183784962, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.009227451868355274, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.007891587913036346, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007805071771144867, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.006397206336259842, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005215629003942013, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09498320519924164, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09498320519924164, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.24227532744407654, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.21790073812007904, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.20763075351715088, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.1819286048412323, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.11380204558372498, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.10376973450183868, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.13676470518112183, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12213342636823654, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.11578913033008575, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09540177881717682, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.08995678275823593, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07010681927204132, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05848240852355957, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.05467997491359711, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.05373571813106537, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03505731746554375, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.028118418529629707, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.027262605726718903, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.024390073493123055, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.023751400411128998, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.018111562356352806, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.017368627712130547, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.016266729682683945, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.011196890845894814, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12213342636823654, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12213342636823654, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.261290967464447, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.2278418242931366, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.2148488461971283, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.18754471838474274, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.12127282470464706, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.10914026945829391, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.14552046358585358, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.13171793520450592, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.12475818395614624, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.0989532619714737, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.09455111622810364, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.0748763307929039, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.06365205347537994, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.058792050927877426, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.05761782079935074, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03756561875343323, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.03131389245390892, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.030410757288336754, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.027041710913181305, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.02628317102789879, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.02021128125488758, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.020928556099534035, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.018680909648537636, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.015325627289712429, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.12475818395614624, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.12475818395614624, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.17374803125858307, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.16081289947032928, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15518657863140106, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.13990391790866852, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.08246251195669174, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.07715672999620438, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09538596123456955, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08741670846939087, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.08362577855587006, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07248255610466003, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.06913058459758759, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.048995859920978546, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04221994802355766, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.03999060019850731, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.03945918381214142, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.024604514241218567, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.021319737657904625, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.020894072949886322, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01939733326435089, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.01906997337937355, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.013365086168050766, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.014103779569268227, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.012606380507349968, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010470700450241566, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09538596123456955, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09538596123456955, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.25289130210876465, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.23489002883434296, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.22790168225765228, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.2053568810224533, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.12009107321500778, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.11313286423683167, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13692933320999146, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.125595822930336, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.12160871922969818, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10562542080879211, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.10058823227882385, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.0701879933476448, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06034339219331741, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.05786658450961113, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.0572761707007885, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.035181038081645966, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.03017798811197281, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.0296501275151968, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.027298448607325554, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.026923831552267075, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.018842464312911034, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.018954601138830185, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.01798684149980545, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.013189036399126053, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.12160871922969818, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.12160871922969818, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.2543904781341553, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.23027418553829193, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.22073189914226532, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.19639018177986145, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.11811012774705887, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.10867361724376678, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.1397353708744049, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12620915472507477, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.12094319611787796, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.10210134088993073, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.09695442765951157, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.07153241336345673, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.06090997904539108, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.057252489030361176, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.05624070391058922, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.0364002026617527, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.03049934282898903, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.03001241758465767, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.027441401034593582, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.026824811473488808, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01998566836118698, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.020289437845349312, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.018578248098492622, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.014919395558536053, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.12094319611787796, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.12094319611787796, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.12516501545906067, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.11335331201553345, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.10561415553092957, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.09347259998321533, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.058661069720983505, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.05243966728448868, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07453195750713348, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06704629212617874, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.05971459671854973, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.05012886971235275, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.04817470163106918, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.038076601922512054, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.03221955895423889, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.028467737138271332, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.027524517849087715, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01919477991759777, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.015185927972197533, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.01452377624809742, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.013542071916162968, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.012954514473676682, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.010235179215669632, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01038635428994894, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.008819092065095901, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.007459206506609917, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.12516501545906067, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.12516501545906067, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09960342943668365, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.09015123546123505, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.08082135021686554, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.07171349972486496, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.04610616713762283, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.039559703320264816, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06364739686250687, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05724601075053215, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.047045156359672546, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03979193791747093, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.0388203039765358, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03227020055055618, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.027409173548221588, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.02230418100953102, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.02095329575240612, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.016166403889656067, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.011793342418968678, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.01090452540665865, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.010564256459474564, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.009683367796242237, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.008461172692477703, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008318775333464146, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.006682709325104952, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005526187364012003, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09960342943668365, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09960342943668365, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.2604663074016571, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.23778486251831055, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.22806647419929504, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.2032300978899002, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.12326925247907639, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.11374061554670334, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.14601826667785645, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.1317264288663864, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.12537117302417755, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.10605772584676743, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.1009056344628334, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0746852234005928, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.06320515275001526, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.05920853838324547, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.0582599863409996, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03733178600668907, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.030310625210404396, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.029493551701307297, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.026743702590465546, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.026115477085113525, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.019254537299275398, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.018472151830792427, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.017618300393223763, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.011625447310507298, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.12326925247907639, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.12326925247907639, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.23718227446079254, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.21585111320018768, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.20768898725509644, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.17852100729942322, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.11073648184537888, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.10283086448907852, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.1290261149406433, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11711878329515457, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.11238736659288406, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09338202327489853, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.08590003103017807, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06625992059707642, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05621767416596413, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.05332684889435768, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.05263378471136093, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03317931666970253, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.027861975133419037, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.02724839746952057, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02441452071070671, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.023959508165717125, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.017695415765047073, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01768719218671322, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.016739679500460625, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012332113459706306, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11711878329515457, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11711878329515457, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.1698266863822937, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15755702555179596, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1523095816373825, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.13742566108703613, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.08053828030824661, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.07560032606124878, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09278882294893265, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08509861677885056, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.08165853470563889, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07104398310184479, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.06776487082242966, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.047596901655197144, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04111733287572861, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.0390700101852417, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.03858237713575363, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.023935092613101006, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.020861638709902763, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.02046559751033783, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.019036613404750824, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.01873897574841976, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.013056674040853977, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.013814888894557953, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.012372969649732113, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010305187664926052, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09278882294893265, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09278882294893265, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.2561105489730835, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.23864176869392395, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.23204603791236877, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.20931921899318695, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.12144507467746735, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.11480573564767838, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13765543699264526, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12653793394565582, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.12286365777254105, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10729396343231201, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.10212049633264542, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07023313641548157, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.060616765171289444, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.05831516161561012, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.05776650458574295, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.0351327508687973, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.030019965022802353, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.02951829321682453, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.027139194309711456, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.0267916452139616, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.018441936001181602, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.018267517909407616, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.017640378326177597, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012046695686876774, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.12286365777254105, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.12286365777254105, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.25481587648391724, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.2305494248867035, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.22119629383087158, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.19712282717227936, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.11783746629953384, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.10849441587924957, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.1382538378238678, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1256139874458313, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.1206359714269638, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.10183310508728027, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.09656190872192383, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.07032405585050583, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.06026884913444519, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.056751251220703125, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.05589408054947853, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.035355713218450546, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.029773136600852013, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.02934221737086773, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.026637356728315353, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.026087984442710876, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.019062576815485954, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01914134994149208, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.017880771309137344, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.013477756641805172, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.1206359714269638, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.1206359714269638, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.1232311800122261, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.11230029910802841, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.10573599487543106, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.09374481439590454, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.05769505724310875, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.052360907196998596, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07107902318239212, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06458309292793274, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.05867312476038933, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.049584873020648956, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.04734845831990242, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.036186862736940384, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.030908390879631042, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.02781212516129017, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.02704741060733795, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01814565621316433, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.014528201892971992, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.013968018814921379, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.012909101322293282, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.012417851947247982, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009530968964099884, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009476670064032078, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.008385579101741314, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006412671413272619, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.1232311800122261, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.1232311800122261, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.1021643877029419, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.09319107979536057, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.08483142405748367, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.0755026713013649, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.04724670946598053, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.04144043102860451, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06263677775859833, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05736339092254639, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.04813337326049805, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.04108439385890961, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.03964570537209511, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03163363039493561, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.027287287637591362, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.022819843143224716, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.021631743758916855, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.015814922749996185, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.01187794841825962, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.01109274197369814, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.010653461329638958, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.009872455149888992, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.008254148997366428, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008075255900621414, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.0067180609330534935, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005147991236299276, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.1021643877029419, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.1021643877029419, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.26642799377441406, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.24518342316150665, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.23655691742897034, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.21169906854629517, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.12599140405654907, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.11743541061878204, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1478331834077835, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.13332527875900269, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.12787775695323944, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.10950697958469391, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.10425817966461182, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07566428929567337, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.06378567963838577, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.06045675650238991, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.05962466821074486, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03779581934213638, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.030775893479585648, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.030081596225500107, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.027360837906599045, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.026827096939086914, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01942211203277111, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.018433768302202225, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.017889175564050674, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.011357733979821205, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.10950697958469391, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.10950697958469391, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.24591895937919617, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.2234286665916443, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.21532376110553741, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.19182217121124268, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.11472095549106598, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.10642039775848389, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.13210223615169525, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.1203712522983551, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.11643420904874802, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09796237200498581, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.0935438722372055, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06791083514690399, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05823907628655434, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.05553169921040535, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.05489029362797737, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.0343468151986599, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.02950458973646164, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.02892889827489853, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02625935524702072, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.025860514491796494, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01891748234629631, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.0193339791148901, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.01807623729109764, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.014273696579039097, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.1203712522983551, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.1203712522983551, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.16072224080562592, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.14939111471176147, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14423370361328125, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.13021881878376007, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.0761786475777626, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.07144738733768463, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08823011815547943, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08091139793395996, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.07720403373241425, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0673733800649643, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.06437363475561142, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04522969201207161, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03904739394783974, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.036929305642843246, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.036423537880182266, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.022712845355272293, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.01966719701886177, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.0192631334066391, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.017968393862247467, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.01765347830951214, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012320947833359241, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012996501289308071, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.011603562161326408, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009612590074539185, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08823011815547943, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08823011815547943, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.24876254796981812, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.23220419883728027, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.22579938173294067, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.20384672284126282, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.11792085319757462, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.11155149340629578, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13390807807445526, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12313494086265564, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.11927538365125656, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10449769347906113, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.09948030114173889, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06836054474115372, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05900098755955696, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.056641969829797745, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.056069884449243546, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03417851775884628, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.029187481850385666, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.028681742027401924, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02645268104970455, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.026090387254953384, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.017947988584637642, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.017831556499004364, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.017129935324192047, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011806132271885872, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12313494086265564, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12313494086265564, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.24637019634246826, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.22152499854564667, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.2113606482744217, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.18817700445652008, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.11357750743627548, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.10364753007888794, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.13424202799797058, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12242395430803299, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.11656267940998077, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09756498038768768, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.09261482954025269, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.0685248076915741, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.058761198073625565, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.054706115275621414, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.05371002107858658, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03452933579683304, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.028743579983711243, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.028253057971596718, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02561982162296772, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.024978384375572205, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.018634101375937462, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01864681765437126, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.017252996563911438, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.013128096237778664, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12242395430803299, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12242395430803299, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10330501943826675, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.0940864086151123, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08732395619153976, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.07809202373027802, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.04818112403154373, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.04303882643580437, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06165100634098053, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.055975042283535004, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.04913657158613205, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04180396720767021, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.0403875969350338, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03138267621397972, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.02678939327597618, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.023295624181628227, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.022401314228773117, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.015716714784502983, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.012281276285648346, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.011677189730107784, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.011038757860660553, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.010463632643222809, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008251035585999489, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008314888924360275, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.006974757649004459, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.00569233438000083, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10330501943826675, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10330501943826675, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09182490408420563, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08319449424743652, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07504820823669434, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.06722396612167358, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.04227054491639137, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.03660473972558975, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05739712715148926, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05216272175312042, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.043290525674819946, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03682137280702591, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.03579653427004814, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.028967242687940598, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.024806756526231766, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.02045084349811077, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.019294872879981995, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01450792234390974, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.01074965950101614, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.01000368781387806, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009685120545327663, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.008940935134887695, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.007578740827739239, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0074788532219827175, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.006056244019418955, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004924975801259279, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09182490408420563, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09182490408420563, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.2222195267677307, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.19915619492530823, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.18656650185585022, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.16756540536880493, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.10286359488964081, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.09205996990203857, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.13030117750167847, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11617855727672577, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.10563582181930542, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08855282515287399, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.0851757824420929, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0665605217218399, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05548480525612831, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.04944267496466637, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.04794101044535637, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03325742483139038, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.025460265576839447, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.024337749928236008, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.022531742230057716, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.021533384919166565, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.017096035182476044, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.016234420239925385, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.014494067057967186, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010224835947155952, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11617855727672577, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11617855727672577, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.24336016178131104, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.21669630706310272, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.20643378794193268, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.18067136406898499, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.11332355439662933, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.10272985696792603, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.13395489752292633, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.12158384174108505, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.11589071899652481, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09448479115962982, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.0888887569308281, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06890782713890076, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05872694402933121, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.05497736483812332, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.05409692972898483, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.034609951078891754, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.029262561351060867, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.0284645427018404, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02561277151107788, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.02503341995179653, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01873135194182396, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01941070333123207, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.017521511763334274, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.014268914237618446, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.12158384174108505, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.12158384174108505, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.16479797661304474, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15294677019119263, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14773811399936676, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1335439383983612, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.07800707221031189, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.07318469882011414, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09018518775701523, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08275650441646576, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.07911783456802368, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06900841742753983, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.06595189869403839, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04616184160113335, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.039900824427604675, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.037758730351924896, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.037249963730573654, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.023198846727609634, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.02003658190369606, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.019628770649433136, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01828734390437603, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.017973385751247406, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012569473125040531, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01313195563852787, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.011849023401737213, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009614983573555946, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09018518775701523, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09018518775701523, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.25527364015579224, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.23836565017700195, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.23168684542179108, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.20937976241111755, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.12108522653579712, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.11454547941684723, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13737943768501282, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12636932730674744, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.12253580242395401, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10733918100595474, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.10254568606615067, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07022132724523544, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06068155914545059, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.05828586220741272, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.057711854577064514, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.035227660089731216, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.03033400885760784, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.029826730489730835, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02759603224694729, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.02723119780421257, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01875554770231247, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.018936101347208023, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.01793716661632061, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.013072998262941837, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.12253580242395401, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.12253580242395401, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.2587074935436249, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.23012113571166992, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.2187681943178177, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.19377528131008148, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.11917220801115036, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.10797131061553955, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.14230290055274963, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12878894805908203, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.1227157786488533, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.10126706957817078, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.09596779197454453, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.0726490169763565, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.06194084510207176, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.057558849453926086, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.056476134806871414, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.036612510681152344, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.03046778216958046, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.02994578517973423, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.027022195979952812, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.026320800185203552, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.019936449825763702, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.020082464441657066, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.018430467694997787, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.01444855984300375, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.1227157786488533, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.1227157786488533, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.11946657299995422, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10938804596662521, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.102153480052948, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.091187484562397, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.055972978472709656, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.050465937703847885, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07124758511781693, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06439263373613358, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.05702458322048187, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04877711832523346, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.04692091420292854, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03639093413949013, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.030987011268734932, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.027204737067222595, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.026257747784256935, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.018302028998732567, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.01456167921423912, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.01392977125942707, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.013180909678339958, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.012586879543960094, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009787105023860931, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.010055772960186005, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.008416447788476944, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0072575220838189125, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.11946657299995422, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.11946657299995422, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10284949094057083, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.09271004796028137, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.08261075615882874, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.0742330253124237, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.047299403697252274, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.040312282741069794, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.0653252899646759, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05963057652115822, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.04853352531790733, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.04117533192038536, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.040254075080156326, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03305675834417343, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02839021012187004, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.022884782403707504, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.021395368501544, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01654263399541378, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.012053503654897213, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.011106831021606922, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.010856385342776775, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.009916350245475769, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.008640674874186516, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008508404716849327, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.006763391196727753, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005581118632107973, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10284949094057083, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10284949094057083, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.2420887053012848, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.2178112119436264, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.20475496351718903, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.1843818873167038, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.11311674118041992, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.10163281112909317, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.14226318895816803, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12643249332904816, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.11600884795188904, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09742378443479538, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.0939515084028244, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07308630645275116, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.06052180379629135, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.05442123860120773, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.052909594029188156, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03658558428287506, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.027981573715806007, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.026819132268428802, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.024742823094129562, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.023737115785479546, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01885201595723629, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.017688332125544548, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.016149310395121574, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.011108817532658577, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.11600884795188904, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.11600884795188904, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.22421248257160187, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.2017117291688919, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.19338735938072205, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.16608379781246185, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.10364247858524323, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.09531350433826447, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12080486863851547, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11007776111364365, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.10521616041660309, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08636345714330673, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.0815473273396492, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06220386177301407, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.053294021636247635, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.05039876326918602, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.0497005470097065, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.0314355194568634, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.027165398001670837, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.026569055393338203, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.023922428488731384, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.023494580760598183, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01749064400792122, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01838478446006775, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.01656939648091793, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0140538876876235, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12080486863851547, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12080486863851547, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.1541430503129959, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.14277589321136475, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1375986933708191, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1242302656173706, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.07286808639764786, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.06813220679759979, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08465174585580826, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.0777924507856369, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.07396532595157623, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06434483826160431, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.061444241553545, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.043324343860149384, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03745385259389877, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.035251036286354065, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.03471870347857475, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.021736526861786842, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.018636805936694145, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.018220268189907074, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01697121001780033, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.016643177717924118, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01170114241540432, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012166307307779789, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.010956596583127975, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008792035281658173, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1242302656173706, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1242302656173706, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.24368169903755188, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.22690512239933014, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.22017762064933777, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.19894397258758545, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.11539427191019058, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.10885728895664215, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13173365592956543, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12101493030786514, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.11683744192123413, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10220567882061005, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.0973314717411995, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06726275384426117, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05807746574282646, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.055524975061416626, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.05492429435253143, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.033718738704919815, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.028805486857891083, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.028278272598981857, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.026155265048146248, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.025769595056772232, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.017923196777701378, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01790212094783783, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.017041999846696854, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012169865891337395, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12101493030786514, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12101493030786514, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.2494245171546936, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.22150050103664398, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.20938116312026978, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.1858767569065094, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.11453983932733536, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.10316120833158493, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.138885498046875, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12563754618167877, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.1182187870144844, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.0976749062538147, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.09284251183271408, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.07101381570100784, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.0605631023645401, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.055486831814050674, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.054222214967012405, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.0359635092318058, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.029653549194335938, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.02903873659670353, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02644585259258747, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.025644339621067047, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.019799472764134407, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.020017344504594803, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.01805066503584385, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.014678068459033966, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.1182187870144844, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.1182187870144844, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.12304345518350601, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1128523126244545, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.10535348206758499, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.09459385275840759, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.05761947110295296, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.05191197618842125, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07358557730913162, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06659156829118729, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.05871135741472244, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.05055009573698044, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.048885755240917206, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03756263107061386, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.03194253519177437, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.027894487604498863, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.02687079831957817, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.018845513463020325, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.014772913418710232, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.014098435640335083, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.013392754830420017, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.012750471010804176, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01001496147364378, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.010038466192781925, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.008558561094105244, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006996785756200552, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.12304345518350601, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.12304345518350601, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10176505148410797, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.09281821548938751, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.08231120556592941, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.07415156066417694, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.046976249665021896, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.0399908572435379, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06584981083869934, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.06012950465083122, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.048005301505327225, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.04135129600763321, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.040508802980184555, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03339407220482826, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.028588954359292984, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.022779133170843124, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.02118549682199955, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.016736848279833794, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.012021214701235294, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.011025524698197842, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.010929066687822342, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.009930144995450974, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.008809410035610199, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008591358549892902, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.006814891006797552, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005638160277158022, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10176505148410797, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10176505148410797, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.25582143664360046, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.23202256858348846, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.2203124314546585, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.19836397469043732, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.11990363895893097, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.10923777520656586, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1472034901380539, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.13150948286056519, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.12248103320598602, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.10389220714569092, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.09977877140045166, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0754152238368988, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.06296467036008835, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.05758711323142052, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.05624222382903099, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03776346147060394, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.02942987158894539, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.02839050441980362, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.026122817769646645, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.025233348831534386, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01934177055954933, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.018165068700909615, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.016857128590345383, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.011165319010615349, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.12248103320598602, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.12248103320598602, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.22556892037391663, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.20648230612277985, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1993180215358734, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.17569321393966675, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.10503019392490387, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.09782244265079498, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12196411937475204, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.1102789118885994, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.1064373031258583, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.0898578092455864, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.08534309267997742, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.0627291202545166, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.053562503308057785, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.051193781197071075, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.050634514540433884, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03164689987897873, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.027763087302446365, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.027258407324552536, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02489587664604187, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.024550441652536392, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.017562171444296837, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01886007934808731, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.016824908554553986, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.014629416167736053, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12196411937475204, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12196411937475204, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.15272589027881622, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1416214108467102, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.13666528463363647, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.12347988039255142, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.07221181690692902, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.06762083619832993, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08356067538261414, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07680176943540573, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.07325337827205658, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06379413604736328, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.060893360525369644, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.042717088013887405, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03694892302155495, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.03488609939813614, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.03439175710082054, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02144245058298111, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.018384547904133797, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.01799132116138935, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.016738303005695343, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.016431774944067, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011553005315363407, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.011904105544090271, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.01086733303964138, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008526002056896687, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.12347988039255142, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.12347988039255142, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.2401459962129593, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.22377829253673553, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.21728570759296417, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.19644860923290253, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.11376673728227615, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.10741619765758514, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1297747790813446, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11926651746034622, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.11519000679254532, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10089696198701859, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.09623552858829498, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06630005687475204, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05725684389472008, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.05480024591088295, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.05421610176563263, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03325353562831879, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.028573565185070038, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.028060633689165115, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.025997232645750046, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.025632167235016823, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.017866207286715508, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.017946431413292885, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.01703808829188347, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012452889233827591, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11926651746034622, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11926651746034622, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.2389039844274521, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.21177616715431213, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.19962361454963684, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.17629900574684143, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.10976520925760269, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.0983811542391777, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.13345246016979218, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1209787130355835, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.1130402609705925, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09303272515535355, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.08821727335453033, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.0682719275355339, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.058423515409231186, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.05316315218806267, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.05188237503170967, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.0346183255314827, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.028446760028600693, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.027781352400779724, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.025269929319620132, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.02441558986902237, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.019208258017897606, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.019281944260001183, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.017466718330979347, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.014091688208281994, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1209787130355835, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1209787130355835, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.12338373064994812, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.11286675930023193, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.10388284921646118, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.09380428493022919, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.05760258063673973, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.05103650316596031, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07629083096981049, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06857746839523315, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.058809876441955566, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.050628796219825745, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.04936043918132782, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03892035037279129, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.03285233676433563, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.02795393578708172, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.026687560603022575, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01952655427157879, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.014893129467964172, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.014083494432270527, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.013551519252359867, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.012769402004778385, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.010396947152912617, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01037566177546978, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.00860112626105547, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.007269109599292278, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.12338373064994812, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.12338373064994812, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10509147495031357, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.09485708922147751, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.08219726383686066, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.07438095659017563, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.04826396331191063, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.03981168568134308, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.07058927416801453, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.06389933079481125, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.04958060383796692, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.04239765927195549, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.041869278997182846, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.035734761506319046, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.03044816106557846, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.02342868596315384, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.021481061354279518, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.017872009426355362, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.012447071261703968, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.011223259381949902, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.011308076791465282, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.010058082640171051, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.009348041377961636, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.009142600931227207, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.0068891276605427265, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0059707025066018105, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10509147495031357, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10509147495031357, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.2546910047531128, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.22770342230796814, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.21392998099327087, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.19238916039466858, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.11827657371759415, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.10592597723007202, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.14822809398174286, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.13213159143924713, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.12137701362371445, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.10133375972509384, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.09760218858718872, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0761752501130104, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.06322365999221802, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.05682014673948288, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.05524569749832153, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.038121506571769714, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.029172474518418312, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.027936922386288643, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02571123093366623, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.024647265672683716, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01959027536213398, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.018408186733722687, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.016661370173096657, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.011512522585690022, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.12137701362371445, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.12137701362371445, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.24971364438533783, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.21724402904510498, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.20597738027572632, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.17903728783130646, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.11637271195650101, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.10385827720165253, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.1356227844953537, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.12346931546926498, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.11879172176122665, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09383932501077652, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.08750636130571365, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06988652050495148, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05974091961979866, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.056578874588012695, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.055816084146499634, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03519931435585022, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.030277585610747337, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.029485171660780907, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02596723847091198, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.025479737669229507, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01929301954805851, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.02018764242529869, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.01829930767416954, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.015135316178202629, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.12346931546926498, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.12346931546926498, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.15882626175880432, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.14796511828899384, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14334501326084137, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.12953734397888184, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.07516906410455704, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.07083133608102798, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08584047853946686, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07918984442949295, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.07612845301628113, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06660044193267822, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.0634695440530777, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.043811555951833725, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03803316131234169, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.036228571087121964, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.035799361765384674, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02194156125187874, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.018943343311548233, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.01858407072722912, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01724405027925968, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.01697615534067154, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011667595244944096, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012007955461740494, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.011066467501223087, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008409272879362106, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08584047853946686, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08584047853946686, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.25127512216567993, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2348257303237915, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2285742312669754, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.20662590861320496, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.11900067329406738, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.11280448734760284, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1345273107290268, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12392760813236237, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.12036679685115814, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10567361116409302, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.10058816522359848, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06853073835372925, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05937473475933075, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.05716107785701752, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.05663783475756645, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.034368958324193954, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.029481464996933937, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.029004700481891632, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.026774154976010323, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.026443196460604668, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.018218496814370155, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.018003756180405617, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.017473453655838966, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011977601796388626, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12392760813236237, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12392760813236237, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.23902136087417603, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.21302834153175354, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.20250190794467926, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.17864938080310822, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.10989929735660553, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.09950244426727295, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.13028603792190552, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11883857846260071, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.11292731761932373, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.0932011529803276, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.08786005526781082, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06652726233005524, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.0570233054459095, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.05290066450834274, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.051882803440093994, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03354288265109062, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.027754805982112885, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.027264317497611046, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.024494795128703117, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.023851260542869568, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.018164798617362976, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01799090765416622, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.016782836988568306, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012607710435986519, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11883857846260071, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11883857846260071, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.11891645938158035, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10773413628339767, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09902694076299667, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.089421845972538, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.05531284958124161, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.04874762147665024, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.0728314220905304, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.0657644271850586, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.056699253618717194, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04834238439798355, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.04709907993674278, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03722355514764786, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.031511224806308746, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.026807250455021858, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.025594528764486313, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.018680719658732414, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.014205873012542725, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.013419896364212036, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01284522283822298, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.012090278789401054, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009958134964108467, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009813046082854271, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.008284859359264374, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006771611049771309, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.11891645938158035, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.11891645938158035, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10518507659435272, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.09449727833271027, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.08275731652975082, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.07490246742963791, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.04845498874783516, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.04018738865852356, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06972188502550125, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.06293157488107681, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.049871835857629776, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.04238371551036835, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.041945680975914, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.035420745611190796, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.030178889632225037, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.02348339557647705, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.02165895327925682, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.017694318667054176, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.012430815026164055, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.011280868202447891, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.011249497532844543, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.01007508672773838, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.009241164661943913, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.009016274474561214, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.0068863509222865105, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005875444505363703, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10518507659435272, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10518507659435272, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.24231626093387604, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.2143331617116928, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.19903132319450378, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.17854197323322296, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.11157573014497757, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.09816989302635193, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.14357994496822357, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.1278078407049179, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.11498606950044632, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0949823334813118, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.09181554615497589, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07394374907016754, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.06110681593418121, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.053591471165418625, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.05166153982281685, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03708379343152046, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.027507850900292397, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.0260876826941967, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02413441240787506, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.022857259958982468, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.019030187278985977, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.01761688105762005, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.01566976122558117, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010846378281712532, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.11498606950044632, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.11498606950044632, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.2541157603263855, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.22575215995311737, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.2152416855096817, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.1833682805299759, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.11776604503393173, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.10656619071960449, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.1368015706539154, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.12570558488368988, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.12031510472297668, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09676539897918701, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.08886351436376572, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.0701838955283165, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.060438450425863266, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.056798167526721954, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.05591650307178497, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03522302210330963, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.029797283932566643, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.029022447764873505, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.0256559569388628, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.025077875703573227, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.018857181072235107, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01917864754796028, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.017652787268161774, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.01352176908403635, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.12031510472297668, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.12031510472297668, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.15754535794258118, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.14713385701179504, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14286860823631287, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.12914486229419708, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.07465161383152008, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.07055986672639847, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08478254824876785, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07820858806371689, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.07553122937679291, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06622974574565887, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.06313282251358032, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04325411468744278, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03754488378763199, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.0359465666115284, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.03556612879037857, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.021671578288078308, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.018740689381957054, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.01841225102543831, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01706213690340519, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.016823923215270042, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011548119597136974, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.011777178384363651, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.011020252481102943, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008181991055607796, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08478254824876785, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08478254824876785, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.2605149745941162, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.24385249614715576, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2377937138080597, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.21490541100502014, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.12349425256252289, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.11738205701112747, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13894617557525635, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.1280355006456375, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.12489757686853409, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10980521887540817, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.10455945134162903, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07094236463308334, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.061372675001621246, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.0593261793255806, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.05883694440126419, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03551211580634117, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.030590461567044258, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.030140096321702003, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.027812305837869644, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.027503764256834984, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.018818072974681854, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.018635936081409454, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.0181287694722414, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012423204258084297, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.12489757686853409, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.12349425256252289, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.23976166546344757, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.2139480710029602, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.20341569185256958, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.17961478233337402, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.10998164117336273, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.09969828277826309, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.13012556731700897, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11902964860200882, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.11303813010454178, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09348291158676147, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.08803673833608627, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06602641940116882, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05697154998779297, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.052860334515571594, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.05184075981378555, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03320302069187164, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.027560528367757797, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.02704613283276558, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.024306394159793854, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.02364613674581051, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.017799409106373787, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01764279045164585, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.01641891337931156, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012058887630701065, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11902964860200882, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11902964860200882, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.11890941113233566, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.107430599629879, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09843677282333374, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.08893978595733643, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.055398885160684586, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.0484662726521492, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07290448993444443, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06612236797809601, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.05674005672335625, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04818524792790413, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.04700862988829613, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03727717697620392, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.031697213649749756, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.026857176795601845, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.025589024648070335, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01873110607266426, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.014225860126316547, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.013372174464166164, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01282214093953371, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.012029068544507027, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009975547902286053, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009847638197243214, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.008283291012048721, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006760925520211458, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.11890941113233566, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.11890941113233566, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.11296605318784714, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.1014484241604805, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.08887302875518799, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.08035093545913696, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.05206137150526047, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.043244775384664536, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.07454252988100052, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.06752760708332062, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.053549088537693024, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.045400962233543396, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.04476288706064224, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03781438246369362, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.03227179870009422, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.02526717446744442, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.023345135152339935, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01893807202577591, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.013439659029245377, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.012227555736899376, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.01215307880192995, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.010945675894618034, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.009959808550775051, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.009774421341717243, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.007563189137727022, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.006504565477371216, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.11296605318784714, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.11296605318784714, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.23734554648399353, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.20884618163108826, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.19245018064975739, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.17262546718120575, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.10905398428440094, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.09499259293079376, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1424548625946045, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.1266496330499649, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.11266808956861496, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09241172671318054, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.08956306427717209, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07294570654630661, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.06045312061905861, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.05243298038840294, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.05036376416683197, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03646445646882057, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.02697097696363926, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.025477590039372444, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.023606833070516586, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.02223924919962883, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01870913989841938, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.017485234886407852, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.01525910571217537, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010833852924406528, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.11266808956861496, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.11266808956861496, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.24269865453243256, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.21312840282917023, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.2019663006067276, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.17262618243694305, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.11334703117609024, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.10197129845619202, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.1331196278333664, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.12135574221611023, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.1155986338853836, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09032215178012848, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.08559846132993698, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06814230233430862, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05849003791809082, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.05487094447016716, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.05401510000228882, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.0342254638671875, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.029092133045196533, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.02828555554151535, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.024631142616271973, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.02405826933681965, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01853068731725216, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.019125204533338547, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.01737213134765625, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.013908158987760544, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.12135574221611023, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.12135574221611023, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.1709505319595337, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15965095162391663, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15509413182735443, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.14025737345218658, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.08100998401641846, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.07660003751516342, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09188594669103622, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08479881286621094, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.08195813000202179, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07188496738672256, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.06846211105585098, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.046890996396541595, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04065524414181709, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.03896016255021095, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.03855038806796074, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.023457372561097145, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.02020936645567417, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.019856302067637444, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.018369318917393684, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.018114997074007988, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012434790842235088, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012546038255095482, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.011874951422214508, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.00854587648063898, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09188594669103622, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09188594669103622, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.26650571823120117, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.24934953451156616, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.24316982924938202, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.21975235641002655, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.1263219714164734, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.12000663578510284, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.14192064106464386, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.1309383362531662, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.1277070939540863, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11227698624134064, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.1068437471985817, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07241082936525345, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06269770860671997, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.06062035635113716, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.06012002378702164, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03618890792131424, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.031135236844420433, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.030665427446365356, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.028256099671125412, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.02793850004673004, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.019007768481969833, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.018782922998070717, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.01831144280731678, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01227522175759077, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11227698624134064, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11227698624134064, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.2469262033700943, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.22033938765525818, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.2096734642982483, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.18544438481330872, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.11333466321229935, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.10289259254932404, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.1341126561164856, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12247760593891144, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.1165349930524826, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09643656760454178, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.0909842774271965, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06809984147548676, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.058598972856998444, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.054483551532030106, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.053477488458156586, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.0342550054192543, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.028409864753484726, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.02791152335703373, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.025070417672395706, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.024414295330643654, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.018467893823981285, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.018162131309509277, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.017101900652050972, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012442109175026417, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12247760593891144, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12247760593891144, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.12010418623685837, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10905744135379791, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.10050297528505325, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.09074976295232773, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.05594700574874878, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.049472078680992126, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07327970862388611, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06611307710409164, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.05726556107401848, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04882284626364708, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.047521959990262985, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03729717805981636, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.03162788227200508, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.027052491903305054, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.02586725354194641, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01868531107902527, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.014234837144613266, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.013460258953273296, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.012835567817091942, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.01209303643554449, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009902911260724068, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.00968864280730486, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.008252715691924095, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006540847942233086, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.12010418623685837, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.12010418623685837, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10935334116220474, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.09788892418146133, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.08715132623910904, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.07867952436208725, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.050236303359270096, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.04257398843765259, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.069574274122715, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.06335539370775223, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.05175679177045822, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0436670146882534, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.042819466441869736, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.035251785069704056, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.030187170952558517, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.024365579709410667, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.02279205434024334, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01765168085694313, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.012865977361798286, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.011857273057103157, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.01158379577100277, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.010585102252662182, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.009232612326741219, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.009119573049247265, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.007207510061562061, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.00603135023266077, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10935334116220474, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10935334116220474, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.25109511613845825, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.22515226900577545, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.2117481231689453, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.1903529316186905, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.11665184050798416, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.10465700924396515, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1444191038608551, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.13034917414188385, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.11960417032241821, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.10002197325229645, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.09607622772455215, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07377870380878448, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.06223096325993538, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.05599577724933624, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.05442649498581886, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.036920372396707535, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.028700802475214005, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.02751392498612404, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.025333192199468613, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.024299299344420433, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.018953336402773857, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.018062405288219452, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.016331104561686516, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.011190004646778107, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.11960417032241821, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.11960417032241821, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.23091313242912292, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.20458556711673737, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.19509002566337585, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.16651761531829834, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.1068635806441307, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.09664107114076614, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.1240215077996254, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11378627270460129, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.1091284304857254, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08736944198608398, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.08022775501012802, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06338195502758026, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.054642241448163986, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.051542978733778, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.05077914893627167, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.031726207584142685, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.02705102041363716, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.026281576603651047, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.023233912885189056, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.022731788456439972, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.016953183338046074, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01740274205803871, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.015938052907586098, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012295713648200035, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.1240215077996254, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.1240215077996254, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.17415602505207062, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.16262845695018768, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15804609656333923, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1427515745162964, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.08258489519357681, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.07807465642690659, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09368924051523209, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08631937205791473, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.08352459967136383, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07319780439138412, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.0697585940361023, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04779616743326187, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04144008457660675, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.03974635526537895, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.03934590518474579, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.023998307064175606, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.020685385912656784, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.020333334803581238, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.0188097283244133, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.018556466326117516, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012897495180368423, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01292941253632307, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.012357323430478573, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008921779692173004, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09368924051523209, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09368924051523209, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.2702719569206238, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.25273922085762024, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.24639904499053955, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.2226281613111496, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.1282067894935608, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.12177937477827072, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.14403323829174042, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.1329016089439392, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.12962083518505096, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11382856965065002, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.10829214006662369, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07350458949804306, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06367412954568863, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.0615699477493763, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.06107236072421074, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03678946569561958, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.0316438265144825, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.031174609437584877, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02870832569897175, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.02838711440563202, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01942680962383747, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01914232410490513, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.018722204491496086, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012570738792419434, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11382856965065002, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11382856965065002, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.24906180799007416, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.22043317556381226, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.2088652104139328, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.18490831553936005, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.11410438269376755, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.10265111923217773, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.13508599996566772, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1238800585269928, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.11748185753822327, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09630218893289566, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.09116606414318085, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.0687652975320816, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.059252455830574036, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.05478355661034584, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.053709350526332855, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03453900292515755, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.02849193662405014, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.0279430840164423, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.024966582655906677, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.024257294833660126, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.018447505310177803, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.018156133592128754, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.01695147342979908, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012272894382476807, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1238800585269928, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1238800585269928, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.1284283697605133, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.11696340888738632, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1079450398683548, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.09756270796060562, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.05998571589589119, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.0532894991338253, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07765784859657288, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.07075972855091095, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.06129373610019684, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.052477434277534485, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.05103752017021179, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03964327275753021, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.0338938944041729, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.02905413880944252, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.02780860662460327, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01984926126897335, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.015364796854555607, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.0145386578515172, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01389879360795021, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.013107819482684135, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.010464951395988464, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.010530333034694195, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.008762587793171406, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.007213233970105648, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.11696340888738632, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.11696340888738632, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.1197969913482666, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.10846561193466187, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.09636154770851135, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.08721235394477844, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.05539162829518318, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.046958331018686295, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.07728738337755203, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.07041677832603455, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.05683615431189537, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.04855131730437279, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.04772398993372917, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03919373080134392, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.0336129255592823, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.026830419898033142, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.02498123236000538, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.019636059179902077, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.014085086062550545, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.012905736453831196, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.012731283903121948, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.011544914916157722, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.010257395915687084, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.00996522419154644, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.007922466844320297, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.006388064473867416, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.1197969913482666, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.1197969913482666, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.265960156917572, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.24082833528518677, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.22890380024909973, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.20586292445659637, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.12433251738548279, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.11327044665813446, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.15055273473262787, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.1361721158027649, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.12715266644954681, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.10748018324375153, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.1028788834810257, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0767284706234932, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.0650816261768341, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.05964229255914688, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.058303236961364746, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.038325365632772446, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.03049354813992977, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.029429607093334198, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.027032209560275078, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.026144543662667274, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01968463510274887, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.018827946856617928, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.01741093210875988, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.011615365743637085, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.12433251738548279, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.12433251738548279, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.2315051108598709, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.20772269368171692, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.19754308462142944, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.1693434715270996, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.10811343044042587, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.09834615886211395, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12758927047252655, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11675138771533966, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.10977958887815475, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08882549405097961, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.08362652361392975, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06514415144920349, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05597488582134247, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.05202620103955269, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.05106253921985626, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03260884806513786, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.027018513530492783, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.026192203164100647, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.023146990686655045, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.022500233724713326, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01726086251437664, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.017128419131040573, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.015898561105132103, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011543091386556625, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11675138771533966, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11675138771533966, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.17043784260749817, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15906527638435364, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15454955399036407, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.13947002589702606, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.08081589639186859, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.07638958096504211, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09152830392122269, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08448067307472229, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.08175364136695862, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07157465815544128, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.06813251227140427, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04670894891023636, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04051799327135086, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.03886463865637779, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.03847604990005493, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02338939905166626, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.020153166726231575, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.019804203882813454, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01829075627028942, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.018045000731945038, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01241722609847784, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012491230852901936, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.011878927238285542, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.00849928893148899, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09152830392122269, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09152830392122269, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.27302882075309753, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.25537800788879395, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.24901814758777618, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.2249131202697754, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.12957952916622162, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.12301906198263168, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1457773596048355, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.13424772024154663, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.1309690624475479, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11494825035333633, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.10935771465301514, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07436439394950867, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06433999538421631, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.06223291531205177, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.06173219159245491, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03726666048169136, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.03201218694448471, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.031532831490039825, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.029031086713075638, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.028712620958685875, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01978609524667263, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.019379669800400734, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.019081881269812584, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012768073938786983, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11494825035333633, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11494825035333633, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.24560998380184174, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.21679270267486572, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.20476366579532623, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.18162314593791962, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.11228350549936295, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.10063070803880692, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.13409103453159332, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12280366569757462, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.11585764586925507, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09471429139375687, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.08998130261898041, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06801094859838486, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.058662377297878265, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.05390298739075661, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.05273023992776871, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.034216638654470444, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.027996454387903214, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.027409309521317482, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.024521755054593086, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.0237402506172657, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.018364811316132545, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.017876354977488518, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.01678765006363392, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.01196546945720911, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12280366569757462, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12280366569757462, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.1339111626148224, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.12227503955364227, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.11336597800254822, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.1024295911192894, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.06273292750120163, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.05595679208636284, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.08055604994297028, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.07335322350263596, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.06399167329072952, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.05495137721300125, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.053317561745643616, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.04109593480825424, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.035161469131708145, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.030307698994874954, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.029071703553199768, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.020571118220686913, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.015881629660725594, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.015056260861456394, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.014332680962979794, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.01355767622590065, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.010780682787299156, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.010680334642529488, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.00906801875680685, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.007143100257962942, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.12227503955364227, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.12227503955364227, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.12056465446949005, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.1095118522644043, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.09790997207164764, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.08858385682106018, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.055989820510149, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.04782414063811302, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.07679164409637451, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.0702652707695961, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.05731438845396042, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.04908578097820282, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.0479993037879467, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03891094774007797, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.033606674522161484, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.02706194669008255, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.025304105132818222, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.019472338259220123, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.014170358888804913, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.013045313768088818, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.012801479548215866, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.011676897294819355, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.010146302171051502, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.009913756512105465, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.007974030449986458, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.00634459313005209, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.12056465446949005, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.12056465446949005, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.26757150888442993, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.24281519651412964, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.23104660212993622, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.20788292586803436, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.12534309923648834, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.11425668746232986, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1526271104812622, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.13715554773807526, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.127931609749794, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.10860089212656021, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.10395621508359909, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07784231752157211, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.06558876484632492, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.06011393666267395, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.05875241011381149, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03888321667909622, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.030697081238031387, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.02962697111070156, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.027248069643974304, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.026375671848654747, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.019980760291218758, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.018908143043518066, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.017646461725234985, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.011648847721517086, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.10860089212656021, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.10860089212656021, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.24839875102043152, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.21836256980895996, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.20684567093849182, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.18060420453548431, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.11520113050937653, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.1036200299859047, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.13656353950500488, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.12430134415626526, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.11819196492433548, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09424827247858047, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.08906327188014984, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06963306665420532, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.059640705585479736, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.055410224944353104, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.05436498299241066, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03499823808670044, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.02878509648144245, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.027878297492861748, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.024614907801151276, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.023910943418741226, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.0186845064163208, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.018275756388902664, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.01719306781888008, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012335594743490219, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.12430134415626526, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.12430134415626526, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.17302021384239197, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1613447517156601, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15673154592514038, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1414542943239212, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.08209379762411118, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.07756797224283218, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09304294735193253, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.0858544260263443, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.08306776732206345, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07267934828996658, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.06912320852279663, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.0475136823952198, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.0412164106965065, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.039513833820819855, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.03910992294549942, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.023823855444788933, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.020524000748991966, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.020168758928775787, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.018624864518642426, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.018371116369962692, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012700249440968037, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012771748006343842, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.012146534398198128, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008742322213947773, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09304294735193253, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09304294735193253, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.2746889293193817, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2568165957927704, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2504420876502991, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.2260574847459793, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.1305181384086609, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.12389983236789703, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1462143063545227, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.13522350788116455, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.13193261623382568, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11569355428218842, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.1098521277308464, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07453382760286331, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06474483013153076, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.06262296438217163, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.06211752071976662, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03722035884857178, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.03209826350212097, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.03160778433084488, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.029048757627606392, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.02872329205274582, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.019336508587002754, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01925824210047722, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.018612388521432877, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012441303580999374, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11569355428218842, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11569355428218842, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.24751676619052887, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.21762129664421082, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.20485994219779968, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.18234921991825104, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.11295011639595032, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.10057929158210754, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.1356799155473709, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12442703545093536, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.11677124351263046, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09529950469732285, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.0908544585108757, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06903859227895737, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05950118601322174, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.05425381287932396, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.05296077951788902, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.0345771349966526, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.02821499854326248, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.027566984295845032, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02471998520195484, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.023872261866927147, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.018338512629270554, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.018133942037820816, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.016568122431635857, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012160442769527435, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12442703545093536, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12442703545093536, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.12660017609596252, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.11506225913763046, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.10575305670499802, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.09547939896583557, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.059180427342653275, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.05214157700538635, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07732713222503662, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.07047030329704285, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.06050431728363037, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.051722362637519836, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.05033179000020027, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03947016969323158, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.0338287428021431, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.0286446250975132, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.027294237166643143, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01976642943918705, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.015091144479811192, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.01421802956610918, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01362402830272913, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.012780421413481236, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.010329404845833778, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.010352163575589657, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.008492020890116692, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006993622053414583, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.11506225913763046, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.11506225913763046, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.11593452841043472, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.10454726219177246, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.09172668308019638, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.08295208215713501, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.053498078137636185, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.044692978262901306, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.07629727572202682, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.06946846097707748, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.05498293787240982, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.046834658831357956, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.04610556364059448, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.038673389703035355, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.03312386944890022, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.025962572544813156, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.02399611845612526, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01936299540102482, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.0137258255854249, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.012482911348342896, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.01242859847843647, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.011168260127305984, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01018987875431776, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.009919132106006145, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.007769003510475159, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.006471241358667612, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.11593452841043472, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.11593452841043472, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.2565612196922302, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.22904929518699646, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.2140415757894516, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.19217123091220856, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.11881300806999207, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.1057596206665039, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.15053483843803406, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.134646475315094, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.12213999032974243, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.10177817940711975, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.09816966205835342, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07722680270671844, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.06426313519477844, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.05708124116063118, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.05525466054677963, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03860647976398468, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.029293527826666832, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.027949083596467972, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.025812679901719093, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.024600619450211525, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01984334923326969, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.018598409369587898, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.016721786931157112, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.011492779478430748, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.12213999032974243, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.12213999032974243, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.2550172507762909, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.22441092133522034, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.21383708715438843, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.18149305880069733, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.11921019852161407, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.10817024856805801, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.13737912476062775, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.126240074634552, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.12154452502727509, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09575880318880081, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.08831536024808884, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.07020453363656998, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.06049034744501114, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.057268716394901276, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.056490443646907806, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03507469967007637, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.029650483280420303, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.02889605052769184, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.024971183389425278, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.024422185495495796, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.018459685146808624, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.018472271040081978, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.017405280843377113, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.01238588709384203, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.12154452502727509, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.12154452502727509, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.17601826786994934, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1641901582479477, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1594783514738083, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.14386239647865295, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.08359747380018234, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.07895652949810028, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09474120289087296, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08746862411499023, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.08458799123764038, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07394760847091675, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.07030890136957169, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04837187007069588, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.041965365409851074, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.04023097828030586, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.039817068725824356, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.024197442457079887, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.02088066190481186, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.02051820605993271, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.018939975649118423, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.018677672371268272, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012823442928493023, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01297861896455288, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.012249491177499294, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008855949155986309, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09474120289087296, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09474120289087296, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.27313461899757385, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2553541660308838, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2489350587129593, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.22457635402679443, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.12986043095588684, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.12319914996623993, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.14584441483020782, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.13463927805423737, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.13126885890960693, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11502470821142197, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.10926394909620285, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07442399859428406, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06451012194156647, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.06235543638467789, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.06184079125523567, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03720574826002121, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.03204106166958809, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.0315583236515522, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02901029959321022, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.02867817133665085, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.019528711214661598, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.019356893375515938, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.018800295889377594, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012665626592934132, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11502470821142197, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11502470821142197, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.2455817312002182, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.21483290195465088, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.20135360956192017, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.17949147522449493, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.11197877675294876, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.09906832128763199, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.13502486050128937, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12415531277656555, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.11599115282297134, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09407813847064972, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.08989746123552322, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06878522038459778, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05946887284517288, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.0538836345076561, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.05249306932091713, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03466065973043442, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.02817777544260025, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.027494946494698524, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02467183955013752, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.023755906149744987, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.018635835498571396, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01837329752743244, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.01681329309940338, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012528765946626663, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12415531277656555, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12415531277656555, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.1297946572303772, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.11826083064079285, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1100652739405632, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.09930367022752762, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.060803450644016266, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.05427302047610283, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07722269743680954, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.0703509971499443, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.062067966908216476, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.05307047814130783, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.051272232085466385, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.039477914571762085, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.03375687450170517, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.02939901128411293, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.028293650597333908, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01978394389152527, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.01546267420053482, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.014706761576235294, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01394673902541399, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.013243304565548897, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.010426350869238377, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.010408807545900345, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.008882506750524044, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.007074651774019003, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.11826083064079285, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.11826083064079285, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.1160726472735405, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.10585837066173553, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.09729646146297455, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.08784414827823639, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.054185640066862106, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.047781724482774734, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.07076039165258408, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.06448803097009659, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.05529752001166344, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.04740341752767563, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.0459645576775074, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.035828083753585815, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.03088361956179142, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.026174964383244514, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.02496342733502388, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.017988666892051697, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.013733656145632267, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.012919756583869457, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.012391602620482445, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.011613293550908566, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.009424610994756222, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.009335409849882126, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.007785949856042862, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.006180228665471077, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.1160726472735405, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.1160726472735405, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.24055112898349762, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.21830971539020538, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.20740768313407898, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.18651604652404785, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.11256621778011322, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.1026391088962555, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.135684534907341, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12362294644117355, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.11490949988365173, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09739307314157486, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.09335854649543762, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06916186958551407, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05904748663306236, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.05398934707045555, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.052723754197359085, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03450268507003784, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.027550838887691498, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.026550741866230965, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.024421345442533493, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.023594025522470474, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.017716672271490097, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.016974586993455887, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.015785332769155502, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.01036221906542778, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12362294644117355, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12362294644117355, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.22477011382579803, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.20175546407699585, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.19323930144309998, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.16588833928108215, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.1056705042719841, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.0969306230545044, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12171387672424316, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11161446571350098, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.1074233427643776, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08695247769355774, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.08146072924137115, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06263018399477005, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.053854916244745255, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.05117826908826828, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.05054156854748726, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.031612150371074677, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.02724592760205269, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.026643114164471626, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.023646622896194458, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.023258119821548462, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.017354443669319153, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01793530397117138, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.016505591571331024, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.013305790722370148, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12171387672424316, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12171387672424316, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.16962702572345734, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1579483598470688, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15319932997226715, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.13819147646427155, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.08061529695987701, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.07599103450775146, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09165389835834503, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08457060158252716, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.08159022033214569, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07117286324501038, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.06773194670677185, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04688494652509689, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04062056913971901, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.03883732482790947, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.038411278277635574, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.023479584604501724, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.020213082432746887, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.01984112150967121, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.0183232594281435, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.018054306507110596, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012502267956733704, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.0126603152602911, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.011924467980861664, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008731427602469921, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09165389835834503, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09165389835834503, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.2606152594089508, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.24344217777252197, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.23713240027427673, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.21390053629875183, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.12398799508810043, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.11749276518821716, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13940690457820892, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12864616513252258, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.12535248696804047, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10968805849552155, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.10423342883586884, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07123973965644836, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06168727949261665, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.05955503508448601, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.05904734134674072, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03563106805086136, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.030664755031466484, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.03018328733742237, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.027746468782424927, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.027422145009040833, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.018781716004014015, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.018620049580931664, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.01805543713271618, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012310398742556572, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.12398799508810043, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.12398799508810043, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.24371939897537231, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.21173836290836334, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.1974175125360489, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.17588946223258972, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.11098986864089966, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.09737405180931091, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.13601185381412506, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12397585064172745, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.11518338322639465, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.0927014946937561, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.08869676291942596, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06909371167421341, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05938197299838066, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.05347947031259537, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.05200900882482529, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.034835245460271835, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.028073949739336967, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.027332080528140068, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02450128272175789, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.023529110476374626, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.018828270956873894, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.018489399924874306, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.016857333481311798, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012716895900666714, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12397585064172745, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12397585064172745, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.12145863473415375, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.11052843928337097, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.10043471306562424, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.09070000797510147, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.056755561381578445, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.0493190735578537, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07671211659908295, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06927986443042755, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.05805177986621857, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.049722254276275635, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.04868992790579796, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03922579810023308, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.033236172050237656, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.027533266693353653, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.026035156100988388, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01965148374438286, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.014592134393751621, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.013624941930174828, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.013219838961958885, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.012273709289729595, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.010336660780012608, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01023181714117527, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.008292225189507008, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006950048264116049, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.12145863473415375, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.12145863473415375, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10846240073442459, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.09780565649271011, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.08364949375391006, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.07587127387523651, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.04993735998868942, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.04050520434975624, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.07481209933757782, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.06755460798740387, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.05139351263642311, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.04391259327530861, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.04362819716334343, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03771473467350006, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.032211869955062866, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.02429899573326111, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.022055717185139656, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.018916694447398186, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.012948164716362953, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.01156248152256012, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.011789968237280846, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.010372967459261417, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.009835418313741684, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.009665334597229958, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.007086249999701977, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.006311139091849327, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10846240073442459, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10846240073442459, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.25080522894859314, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.22317615151405334, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.2074178159236908, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.18630999326705933, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.11622391641139984, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.102507084608078, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.14916490018367767, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.13336017727851868, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.11961206048727036, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09938772767782211, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.09604896605014801, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07654798030853271, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.06377958506345749, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.05592535436153412, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.05392100289463997, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03844427689909935, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.02888917177915573, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.027446599677205086, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02550722099840641, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.02420329861342907, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.019802795723080635, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.018748851493000984, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.01639750227332115, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.011969882994890213, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.11961206048727036, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.11961206048727036, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.23708805441856384, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.20965391397476196, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1997527927160263, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.17128705978393555, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.11042902618646622, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.10024343430995941, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12813293933868408, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11723130941390991, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.11246707290410995, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08946791291236877, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.08372054249048233, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06565281003713608, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.056294139474630356, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.05324745923280716, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.05249492824077606, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.0328785739839077, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.027967015281319618, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.027277201414108276, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.023857861757278442, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.023359328508377075, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.017661601305007935, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.017983291298151016, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.016661208122968674, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012736961245536804, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11723130941390991, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11723130941390991, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.15692868828773499, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.14595374464988708, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14122790098190308, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.12729701399803162, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.07451127469539642, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.06998255848884583, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08533081412315369, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07872274518013, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.07548869401216507, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06575832515954971, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.06255915760993958, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04361288622021675, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03780703246593475, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.035910870879888535, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.0354573056101799, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.021843474358320236, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.018709134310483932, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.01833624765276909, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.016955014318227768, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.01666707918047905, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011623084545135498, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.011788002215325832, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.010997248813509941, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008132653310894966, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08533081412315369, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08533081412315369, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.23790693283081055, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2217845618724823, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.21557974815368652, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.19442293047904968, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.1131025180220604, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.10687880218029022, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12801072001457214, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11799116432666779, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.1144075095653534, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09993095695972443, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.09495271742343903, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06545908004045486, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.056614309549331665, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.054402004927396774, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.05387524887919426, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03279438614845276, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.028184285387396812, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.027712298557162285, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.025534773245453835, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.025199884548783302, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.017439667135477066, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01742265559732914, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.016695505008101463, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011819549836218357, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11799116432666779, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11799116432666779, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.22690948843955994, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.19702903926372528, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.1837066411972046, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.16240765154361725, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.1035439744591713, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.09075742214918137, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.1261899769306183, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11543335020542145, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.10731871426105499, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08588243275880814, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.08172760903835297, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.0644415020942688, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.055453915148973465, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.04998244345188141, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.048619359731674194, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03249574080109596, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.0263933464884758, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.025714222341775894, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.022976547479629517, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.02208835631608963, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.017626041546463966, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.017572099342942238, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.015835491940379143, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012358351610600948, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11543335020542145, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11543335020542145, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.12086383998394012, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1098482683300972, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.10041236132383347, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.09086494147777557, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.05633806064724922, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.0492563433945179, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07641378790140152, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06792528182268143, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.057694435119628906, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04936637729406357, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.048332009464502335, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03903147205710411, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.0325392447412014, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.027271926403045654, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.025899158790707588, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01958647556602955, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.014384930953383446, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.013475913554430008, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.013019919395446777, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.012160556390881538, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.010262280702590942, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009945289231836796, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.008220355026423931, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006697235628962517, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.12086383998394012, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.12086383998394012, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10624007880687714, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.09719563275575638, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.08688359707593918, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.0785350427031517, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.049360327422618866, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.042271390557289124, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06928012520074844, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.0625605508685112, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.05043511837720871, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.043619588017463684, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.042838431894779205, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03525974601507187, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.029844198375940323, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.023924391716718674, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.022288277745246887, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01765289157629013, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.012586357071995735, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.01157689280807972, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.011463534086942673, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.010449836030602455, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.009188964031636715, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008926782757043839, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.007037597242742777, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005820656195282936, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10624007880687714, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10624007880687714, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.26695334911346436, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.24403411149978638, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.2324105054140091, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.20959116518497467, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.12553615868091583, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.11505548655986786, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.15242363512516022, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.1376485377550125, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.12813472747802734, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.10970377922058105, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.1054045781493187, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0780363604426384, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.06595226377248764, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.06033524125814438, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.058957573026418686, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.039144761860370636, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.03092261403799057, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.02988026663661003, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.027698293328285217, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.026785245165228844, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.020120149478316307, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.019289636984467506, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.017691809684038162, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.012139428406953812, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.10970377922058105, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.10970377922058105, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.20207442343235016, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.17410165071487427, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.16410605609416962, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.14412608742713928, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.09380824118852615, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.08242782205343246, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11104734241962433, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09983530640602112, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.09621007740497589, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07573463767766953, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.07100212574005127, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.0570220947265625, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04798471927642822, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.045203667134046555, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.044529493898153305, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.02857092209160328, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.02359413169324398, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.022842181846499443, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.020016198977828026, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.019561797380447388, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.015164056792855263, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.015034577809274197, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.014183664694428444, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.010431862436234951, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11104734241962433, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11104734241962433, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.15470853447914124, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.143690288066864, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.13887734711170197, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.12517574429512024, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.07341715693473816, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.06886659562587738, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.0842832550406456, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07768698036670685, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.07438725978136063, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06469970941543579, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.06165211275219917, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04314796254038811, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03735879808664322, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.03543420508503914, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.03497306630015373, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02161787636578083, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.018581867218017578, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.01819712109863758, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.016856396570801735, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.016565660014748573, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011573274619877338, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.011881920509040356, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.010933561250567436, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008395472541451454, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.12517574429512024, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.12517574429512024, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.20828302204608917, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.19392536580562592, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.18820618093013763, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1698475182056427, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.09964270144701004, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.094008669257164, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.11306913197040558, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10433095693588257, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.10088793188333511, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.08812219649553299, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.0839032530784607, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.058496974408626556, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05099369212985039, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.04893920570611954, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.04845690727233887, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.029491767287254333, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.027062304317951202, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.026655888184905052, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.024950701743364334, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.024664489552378654, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016683610156178474, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.018933845683932304, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.016043953597545624, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.015263399109244347, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.11306913197040558, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.11306913197040558, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.2279895842075348, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.19971419870853424, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.18802665174007416, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.16597674787044525, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.10433769971132278, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.09285218268632889, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12570883333683014, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11437281221151352, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.10763055831193924, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08715927600860596, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.08235909044742584, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06370457261800766, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05478557571768761, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.050271496176719666, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.0491555891931057, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03200182691216469, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.026419827714562416, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.025861207395792007, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.023077167570590973, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.022342165932059288, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.017126962542533875, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01727263256907463, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.015594209544360638, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.01206816453486681, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11437281221151352, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11437281221151352, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.11633272469043732, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10532304644584656, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09458701312541962, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.08542822301387787, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.05413692444562912, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.04636075720191002, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07519298791885376, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06738747656345367, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.055516209453344345, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.047268252819776535, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.046490028500556946, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03843101114034653, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.03233065456151962, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.026311732828617096, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.024698439985513687, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.019292496144771576, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.014016199856996536, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.012996827252209187, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.012690906412899494, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.01167815551161766, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.010189254768192768, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.010008402168750763, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.007979560643434525, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006834996864199638, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.11633272469043732, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.11633272469043732, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10782302916049957, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.09727256745100021, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.08290255814790726, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.07506619393825531, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.0495280884206295, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.0400238074362278, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.0750044584274292, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.06747063994407654, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.050987567752599716, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0435541495680809, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.043499063700437546, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03807716444134712, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.03213566914200783, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.024135936051607132, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.021821636706590652, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01913963444530964, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.012856651097536087, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.011443743482232094, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.011711257509887218, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.010261782445013523, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.009962882846593857, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.00964501965790987, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.0070613473653793335, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.006290350575000048, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10782302916049957, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10782302916049957, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.24779945611953735, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.21932120621204376, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.20225362479686737, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.18198367953300476, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.11435634642839432, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.09997124969959259, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.14944593608379364, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.13334068655967712, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.1180448979139328, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09776442497968674, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.09518804401159286, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07697474956512451, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.06383183598518372, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.05517478659749031, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.0529002770781517, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.0387706384062767, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.02866191789507866, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.027038177475333214, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.025337282568216324, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.023840267211198807, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.02005777694284916, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.01899076998233795, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.016276836395263672, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.012246988713741302, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.1180448979139328, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.1180448979139328, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.20280657708644867, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.17879493534564972, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1706671118736267, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.14867213368415833, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.09439761191606522, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.08582767099142075, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.1092972606420517, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09935511648654938, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.09612320363521576, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07727725058794022, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.07332424074411392, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05614902451634407, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04806327819824219, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.045845840126276016, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.04531029984354973, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.028249545022845268, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.024568133056163788, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.02404877357184887, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.021307870745658875, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.020978596061468124, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.015568292699754238, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016379104927182198, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.014865197241306305, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012362580746412277, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.1092972606420517, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.1092972606420517, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.15720723569393158, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.14628241956233978, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14169348776340485, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.12783962488174438, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.07509022951126099, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.07070449739694595, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08585131913423538, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07910092175006866, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.07603728026151657, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06639128923416138, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.06334393471479416, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.044405851513147354, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.038693711161613464, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.036911070346832275, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.036485396325588226, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.022419529035687447, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.020393142476677895, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.020059967413544655, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.018793394789099693, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.01854782924056053, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012675803154706955, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01429296936839819, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.012110485695302486, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011474247090518475, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08585131913423538, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08585131913423538, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.16368691623210907, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15251444280147552, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14805398881435394, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.13358061015605927, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.07821198552846909, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.07382196187973022, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08884679526090622, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08183831721544266, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.07915043830871582, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06916655600070953, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.06591301411390305, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04591628164052963, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.039902593940496445, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.03827737644314766, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.03789393976330757, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02315882034599781, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.020949816331267357, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.020628154277801514, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01926077902317047, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.019039170816540718, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.013004228472709656, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.014432459138333797, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.012490968219935894, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011418337002396584, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08884679526090622, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08884679526090622, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.1924021989107132, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.16884757578372955, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.1590947061777115, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.1392904669046402, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.0886109322309494, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.07909572124481201, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.10797996819019318, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.09625326842069626, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.09101984649896622, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.07341615855693817, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.06962621212005615, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05483125150203705, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.04682470113039017, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.043329689651727676, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.04247864708304405, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.028021328151226044, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.02376459911465645, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.02333918586373329, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.020983118563890457, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.020441772416234016, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.016167744994163513, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01664559170603752, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.015114263631403446, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012981044128537178, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.10797996819019318, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.10797996819019318, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.1122332364320755, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10146570950746536, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09132018685340881, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.08238885551691055, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.052140433341264725, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.0447358600795269, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07180048525333405, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06470265239477158, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.05350968986749649, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.045479729771614075, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.04470670968294144, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03668444603681564, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.03102146089076996, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.025313924998044968, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.02379668317735195, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.018387680873274803, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.013472390361130238, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.012509288266301155, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.012188446708023548, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.011241921223700047, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009664706885814667, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009590334258973598, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.007591956295073032, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0065482622012495995, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.1122332364320755, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.1122332364320755, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10584180057048798, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.09536493569612503, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.0821518748998642, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.07434085756540298, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.04872169718146324, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.03982243314385414, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.07317765802145004, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.06518469750881195, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.050129618495702744, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.042675502598285675, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.04257480055093765, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03715771436691284, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.031090082600712776, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.02375398576259613, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.021665522828698158, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.018642444163560867, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.012700282968580723, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.011432159692049026, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.011539974249899387, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.010246561840176582, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.009784608148038387, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.009461980313062668, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.007123508490622044, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.006320095621049404, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10584180057048798, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10584180057048798, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.25176048278808594, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.22031229734420776, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.20147019624710083, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.18135839700698853, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.11589957773685455, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.09969988465309143, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.15152214467525482, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.13631701469421387, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.12014875560998917, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09796909987926483, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.09559004753828049, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07805535942316055, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.06525769829750061, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.055835284292697906, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.05339808762073517, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03922862559556961, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.02885194681584835, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.027098339051008224, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.025256440043449402, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.02364250272512436, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.020225955173373222, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.019061654806137085, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.01630510948598385, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.011995866894721985, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.12014875560998917, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.12014875560998917, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.12860853970050812, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.11417870968580246, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.10974547266960144, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.09463819116353989, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.0581493116915226, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.0530950129032135, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.0683680847287178, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06090198829770088, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.05912065878510475, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04755973070859909, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.04556877911090851, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03478064760565758, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.031210171058773994, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.030016344040632248, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.02976573444902897, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.0184545386582613, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.018532924354076385, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.018297452479600906, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01690269261598587, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.01673930138349533, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.011454921215772629, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.014788318425416946, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.011122923344373703, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.013270273804664612, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.11417870968580246, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.11417870968580246, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.15583489835262299, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.14565527439117432, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14159303903579712, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.12760065495967865, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.07402777671813965, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.0700405016541481, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08389274030923843, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07730814069509506, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.07482965290546417, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06550782173871994, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.06228739395737648, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04284433275461197, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.037083420902490616, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.035608548671007156, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.035253800451755524, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02143002673983574, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.0184524804353714, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.01814092881977558, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.016731206327676773, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.01650657132267952, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011300995014607906, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.011421299539506435, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.010801287367939949, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.007753858342766762, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08389274030923843, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08389274030923843, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.10543034225702286, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.09823538362979889, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.09542685747146606, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.08598041534423828, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.049982618540525436, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.047251325100660324, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.056745272129774094, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.05222563445568085, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.05056063085794449, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.04417003318667412, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.04202922806143761, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.02907412126660347, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.025142885744571686, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.024143336340785027, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.023905610665678978, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.014608352445065975, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.01273175235837698, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.012521632015705109, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.011587874963879585, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.01144102681428194, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.007935749366879463, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.008182563818991184, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.007608088664710522, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.005920649506151676, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.10543034225702286, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.10543034225702286, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.11237841844558716, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.09809909760951996, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.09135634452104568, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.07980567216873169, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.051791224628686905, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.04574398323893547, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.06505373120307922, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.05802566558122635, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.053453125059604645, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.04293401166796684, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.0411328487098217, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.0336107537150383, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.028394998982548714, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.025644704699516296, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.024932866916060448, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.01739826425909996, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.01452014222741127, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.0141941262409091, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.012977855280041695, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.012565894983708858, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01032452005892992, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01074820663779974, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.009471503086388111, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.008736475370824337, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.11237841844558716, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.11237841844558716, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + } + ], + "last_module_idx": 66, + "base_perplexity": 7.933868071418541 +} \ No newline at end of file