{ "measurement": [ { "key": "model.layers.0.self_attn.q_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.053829073905944824, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.05293033644556999, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.025039445608854294, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.02491147071123123, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.02482321485877037, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.011136921122670174, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.06642473489046097, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.052372246980667114, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.02490975335240364, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.024762000888586044, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.02502734400331974, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.028598949313163757, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.02472260221838951, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.01476888544857502, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.011076855473220348, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.01609085500240326, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.011056886985898018, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.009628737345337868, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.011052347719669342, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.009623687714338303, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.012311754748225212, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.011051605455577374, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.010891532525420189, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.009620326571166515, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.011136921122670174, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, "best_option": { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.011136921122670174, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } } }, { "key": "model.layers.0.self_attn.k_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.046816300600767136, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.0462322011590004, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.02342386730015278, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.02333904802799225, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.023277655243873596, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.011587287299335003, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.0525086335837841, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.04585539922118187, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.023316005244851112, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.02323719672858715, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.02432628534734249, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.03025023266673088, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.023209568113088608, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.014074638485908508, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.011553943157196045, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.01566249132156372, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.011543817818164825, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.0104021355509758, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.01154383085668087, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.010400699451565742, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.012866079807281494, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.011541630141437054, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.01179521344602108, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.010399634949862957, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.011587287299335003, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, "best_option": { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.011587287299335003, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } } }, { "key": "model.layers.0.self_attn.v_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.12623554468154907, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.1131783127784729, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.06799986958503723, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.062462154775857925, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.05900634080171585, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.02798113413155079, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.14311003684997559, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.10630106925964355, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.062187280505895615, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.05491376295685768, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.07286947220563889, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.0879056379199028, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.05314257740974426, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.029531318694353104, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.019214937463402748, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.04549257084727287, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.014920918270945549, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.009823834523558617, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.014099689200520515, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.007988257333636284, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.023242203518748283, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.013737170957028866, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.00788735318928957, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.006798319984227419, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.014920918270945549, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.014920918270945549, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.0.self_attn.o_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.013302475214004517, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.008441327139735222, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.004982148762792349, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.00539852911606431, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.005076173227280378, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.002732739085331559, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.008855077438056469, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.00789971835911274, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.006196079310029745, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.004008839372545481, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.004214541055262089, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.0044324640184640884, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.003917671740055084, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.0027274973690509796, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.0023739738389849663, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.002329392358660698, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.001827241387218237, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.0017328476533293724, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.0016875634901225567, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.0015149954706430435, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.0014024450210854411, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.0016775686526671052, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.0010699708946049213, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.0014523409772664309, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.013302475214004517, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.013302475214004517, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.0.mlp.gate_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.14445719122886658, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.13350731134414673, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.12548698484897614, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.1118655726313591, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.0664844736456871, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.06145395338535309, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.08452418446540833, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.07650399208068848, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.06814761459827423, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.060234423726797104, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.055629439651966095, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.04245101660490036, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.03699352219700813, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.03273417055606842, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.03162741661071777, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.021729080006480217, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.01832720637321472, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.017803560942411423, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.017222624272108078, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.016246158629655838, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.012728175148367882, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.013512540608644485, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.011436620727181435, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.01067913044244051, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.01832720637321472, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.01832720637321472, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.0.mlp.up_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.18171736598014832, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.16819614171981812, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.16132770478725433, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.14365291595458984, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.08362497389316559, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.07875405997037888, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.09944415837526321, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.0913340300321579, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.08564983308315277, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.075399249792099, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.06913411617279053, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.05038468539714813, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.0435224324464798, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.040049489587545395, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.03920832276344299, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.02515293098986149, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.020643459632992744, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.02027827501296997, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.018998591229319572, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.01799653097987175, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.013205970637500286, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.012943750247359276, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.011983090080320835, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.008389363065361977, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.01799653097987175, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.01799653097987175, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.0.mlp.down_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1711869673295454, "total_bits": 25043200.0, "err": 0.11829442530870438, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 27402496.0, "err": 0.08989354968070984, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 30286080.0, "err": 0.07846572995185852, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.716641512784091, "total_bits": 31334656.0, "err": 0.06758496165275574, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.216641512784091, "total_bits": 37101824.0, "err": 0.051266491413116455, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7393687855113638, "total_bits": 43131136.0, "err": 0.03946025297045708, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 34965568.0, "err": 0.0647730827331543, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 36053248.0, "err": 0.059094756841659546, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1711869673295454, "total_bits": 36577536.0, "err": 0.05499260872602463, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.534823330965909, "total_bits": 40771840.0, "err": 0.037105146795511246, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.653775301846591, "total_bits": 42143872.0, "err": 0.03613000735640526, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 46499904.0, "err": 0.03307793289422989, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 47587584.0, "err": 0.02857847698032856, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.216641512784091, "total_bits": 48636160.0, "err": 0.025084074586629868, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.307550603693182, "total_bits": 49684736.0, "err": 0.02421596273779869, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 58034240.0, "err": 0.01669948175549507, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.216641512784091, "total_bits": 60170496.0, "err": 0.013792858459055424, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.307550603693182, "total_bits": 61219072.0, "err": 0.013561559841036797, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.534823330965909, "total_bits": 63840512.0, "err": 0.01126856729388237, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.693914240056818, "total_bits": 65675520.0, "err": 0.010796998627483845, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 69568576.0, "err": 0.009179718792438507, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 70656256.0, "err": 0.009943880140781403, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.00809917040169239, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.007663300260901451, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 58034240.0, "err": 0.01669948175549507, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 58034240.0, "err": 0.01669948175549507, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.1.self_attn.q_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.037418629974126816, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.03421865776181221, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.017161887139081955, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.01673400029540062, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.01645580679178238, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.007981427945196629, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.036898620426654816, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.03351333737373352, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.01748415268957615, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.015980161726474762, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.016800131648778915, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.017674947157502174, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.01585400104522705, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.009646219201385975, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.007396853994578123, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.009800701402127743, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.007115859538316727, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.006393231451511383, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.007062036078423262, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.0062866914086043835, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.00684909475967288, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.00704865762963891, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.005756457801908255, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.0062532066367566586, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.017161887139081955, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, "best_option": { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.017161887139081955, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } } }, { "key": "model.layers.1.self_attn.k_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.028624409809708595, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.027396799996495247, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.012605564668774605, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.01241437066346407, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.012294242158532143, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.006656958255916834, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.03157026320695877, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.027023915201425552, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.012632415629923344, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.012127122841775417, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.012659897096455097, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.014814065769314766, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.012067553587257862, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.0077308304607868195, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.006465129554271698, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.007904539816081524, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.00637996569275856, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.006021370179951191, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.006363769061863422, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.0059924982488155365, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.006389586254954338, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.006358997896313667, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.005918804556131363, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.005983768962323666, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.012605564668774605, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, "best_option": { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.012605564668774605, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } } }, { "key": "model.layers.1.self_attn.v_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.1515704095363617, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.1142626702785492, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.08563944697380066, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.07328784465789795, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.06249556317925453, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.042227428406476974, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.10909415781497955, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.09321533888578415, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.0734281837940216, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.052396852523088455, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.05471912771463394, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.0581849031150341, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.04663555324077606, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.03171435371041298, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.02582699805498123, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.03189598768949509, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.016895025968551636, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.014211590401828289, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.014975565485656261, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.010613828897476196, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.018595034256577492, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.013961877673864365, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.009030591696500778, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.007939312607049942, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.016895025968551636, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.016895025968551636, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.1.self_attn.o_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.16542474925518036, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.12124495953321457, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.09745484590530396, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.08204258233308792, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.06880742311477661, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.0505678616464138, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.1029285117983818, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.09148325026035309, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.07793446630239487, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.05248141288757324, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.05106106027960777, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.05303698405623436, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.04453105479478836, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.034475453197956085, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.031729504466056824, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.027220139279961586, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.020005296915769577, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.019141152501106262, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.017271341755986214, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.015261121094226837, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.015633009374141693, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.016091149300336838, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.012669707648456097, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.012719040736556053, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.017271341755986214, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.017271341755986214, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.1.mlp.gate_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.17621555924415588, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.16426390409469604, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.15998026728630066, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.14348956942558289, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.08007147163152695, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.0768776386976242, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.09117129445075989, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.0838731899857521, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.08164379745721817, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.07303763180971146, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.06717387586832047, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.04615342244505882, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.04003363847732544, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.03835600987076759, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.03796156495809555, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.02313716523349285, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.020063169300556183, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.01993033103644848, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.018702713772654533, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.01806962490081787, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.012488706037402153, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.012739968486130238, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.011955524794757366, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.008976761251688004, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.01806962490081787, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.01806962490081787, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.1.mlp.up_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.2122102975845337, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.1993441879749298, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.19479432702064514, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.17573945224285126, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.0976129025220871, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.09418688714504242, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.10978963971138, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.10132507234811783, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.09932203590869904, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.08946268260478973, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.08250557631254196, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.055635567754507065, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.04823325574398041, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.04656664654612541, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.04616532102227211, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.02775779739022255, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.023800181224942207, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.023676849901676178, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.02214965410530567, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.02140955440700054, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.014441142790019512, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.014215749688446522, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.01388310082256794, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.009100452065467834, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.014441142790019512, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.014441142790019512, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.1.mlp.down_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1711869673295454, "total_bits": 25043200.0, "err": 0.193497434258461, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 27402496.0, "err": 0.17538665235042572, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 30286080.0, "err": 0.16799509525299072, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.716641512784091, "total_bits": 31334656.0, "err": 0.1525888890028, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.216641512784091, "total_bits": 37101824.0, "err": 0.08794218301773071, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7393687855113638, "total_bits": 43131136.0, "err": 0.08063476532697678, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 34965568.0, "err": 0.10379897058010101, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 36053248.0, "err": 0.09469299763441086, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1711869673295454, "total_bits": 36577536.0, "err": 0.09032458811998367, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.534823330965909, "total_bits": 40771840.0, "err": 0.07752079516649246, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.653775301846591, "total_bits": 42143872.0, "err": 0.07509470731019974, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 46499904.0, "err": 0.05293988063931465, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 47587584.0, "err": 0.04540679231286049, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.216641512784091, "total_bits": 48636160.0, "err": 0.04234427958726883, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.307550603693182, "total_bits": 49684736.0, "err": 0.04160016030073166, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 58034240.0, "err": 0.026669014245271683, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.216641512784091, "total_bits": 60170496.0, "err": 0.022354451939463615, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.307550603693182, "total_bits": 61219072.0, "err": 0.022080039605498314, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.534823330965909, "total_bits": 63840512.0, "err": 0.020345360040664673, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.693914240056818, "total_bits": 65675520.0, "err": 0.020030923187732697, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 69568576.0, "err": 0.014568276703357697, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 70656256.0, "err": 0.014637775719165802, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.013536748476326466, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.010502761229872704, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 69568576.0, "err": 0.014568276703357697, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 69568576.0, "err": 0.014568276703357697, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.2.self_attn.q_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.040790461003780365, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.03546302020549774, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.024272048845887184, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.021719256415963173, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.01793026737868786, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.011555744335055351, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.03376104682683945, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.03043464757502079, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.01922236569225788, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.016117263585329056, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.016506893560290337, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.01671406254172325, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.014468017965555191, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.009166195057332516, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.007417692802846432, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.0085323266685009, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.005529697053134441, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.004745753947645426, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.005238295067101717, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.00427250238135457, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.004888851195573807, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.004987061023712158, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.0034053348936140537, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.0037930181715637445, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.01793026737868786, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.01793026737868786, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.2.self_attn.k_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.02779269590973854, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.025031277909874916, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.015167178586125374, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.013968783430755138, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.012341354042291641, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.006611923687160015, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.025748154148459435, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.022845888510346413, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.01302194595336914, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.011551621370017529, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.011983949691057205, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.012425906956195831, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.010911432094871998, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.006251861806958914, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.004657125100493431, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.006213417276740074, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.003826298052445054, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.0031774952076375484, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.003699907101690769, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.0029600372072309256, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.003687808755785227, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.0036015864461660385, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.0026789468247443438, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.0027684252709150314, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.015167178586125374, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, "best_option": { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.015167178586125374, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } } }, { "key": "model.layers.2.self_attn.v_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.18465590476989746, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.15328799188137054, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.13356468081474304, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.11488160490989685, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.0811881273984909, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.06747696548700333, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.12628254294395447, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.10389098525047302, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.0883227288722992, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.06830887496471405, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.06577350944280624, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.06686919182538986, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.05038635805249214, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.03935812786221504, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.036354534327983856, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.03409691900014877, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.02043243683874607, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.019166335463523865, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.017785748466849327, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.015231111086905003, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.01753169856965542, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.014345376752316952, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.011292475275695324, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.008633642457425594, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.017785748466849327, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.017785748466849327, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.2.self_attn.o_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.18224020302295685, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.1461477428674698, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.13074228167533875, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.10553783178329468, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.08015379309654236, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.06804849952459335, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.10697837918996811, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.09464964270591736, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.08656441420316696, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.06206255778670311, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.057808905839920044, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.055406324565410614, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.04690457880496979, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.04038121923804283, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.03868507593870163, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.028741877526044846, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.02347385138273239, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.02299841307103634, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.020407546311616898, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.018905527889728546, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.016824154183268547, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.01820172742009163, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.014800125733017921, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.015046149492263794, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.016824154183268547, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.016824154183268547, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.2.mlp.gate_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.2236519753932953, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.2098362296819687, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.205455020070076, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.18468821048736572, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.10451100766658783, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.10092481225728989, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.11815567314624786, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.10830163210630417, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.10637792199850082, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.09553590416908264, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.08794550597667694, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.060317765921354294, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.052914198487997055, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.051270741969347, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.05088842287659645, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.030822530388832092, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.028430785983800888, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.028326157480478287, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.026798080652952194, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.02612335793673992, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.017811482772231102, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.019945260137319565, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.017291177064180374, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.016226833686232567, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.017811482772231102, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.017811482772231102, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.2.mlp.up_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.24587851762771606, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.23087796568870544, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.22596099972724915, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.2032952755689621, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.11466392874717712, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.11071345210075378, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.12869638204574585, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.11877116560935974, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.11674540489912033, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.10473272204399109, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.09636107087135315, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.06584690511226654, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.05702194571495056, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.055248286575078964, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.05480574071407318, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.0330096110701561, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.028987571597099304, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.0288773812353611, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.027039645239710808, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.026201307773590088, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.01791742816567421, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.018392333760857582, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.01733287423849106, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.013183442875742912, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.01791742816567421, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.01791742816567421, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.2.mlp.down_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1711869673295454, "total_bits": 25043200.0, "err": 0.04786253347992897, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 27402496.0, "err": 0.04471839219331741, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 30286080.0, "err": 0.03402847424149513, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.716641512784091, "total_bits": 31334656.0, "err": 0.03168060630559921, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.216641512784091, "total_bits": 37101824.0, "err": 0.025171339511871338, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7393687855113638, "total_bits": 43131136.0, "err": 0.01838238351047039, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 34965568.0, "err": 0.052795566618442535, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 36053248.0, "err": 0.03755969554185867, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1711869673295454, "total_bits": 36577536.0, "err": 0.025504760444164276, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.534823330965909, "total_bits": 40771840.0, "err": 0.023793278262019157, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.653775301846591, "total_bits": 42143872.0, "err": 0.023741137236356735, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 46499904.0, "err": 0.02329089492559433, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 47587584.0, "err": 0.021367549896240234, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.216641512784091, "total_bits": 48636160.0, "err": 0.014673522673547268, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.307550603693182, "total_bits": 49684736.0, "err": 0.01423281617462635, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 58034240.0, "err": 0.013775442726910114, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.216641512784091, "total_bits": 60170496.0, "err": 0.012793389149010181, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.307550603693182, "total_bits": 61219072.0, "err": 0.013869013637304306, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.534823330965909, "total_bits": 63840512.0, "err": 0.012643614783883095, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.693914240056818, "total_bits": 65675520.0, "err": 0.013683566823601723, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 69568576.0, "err": 0.013217072933912277, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 70656256.0, "err": 0.012411735951900482, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.014462396502494812, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.013352912850677967, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7393687855113638, "total_bits": 43131136.0, "err": 0.01838238351047039, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, "best_option": { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7393687855113638, "total_bits": 43131136.0, "err": 0.01838238351047039, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } } }, { "key": "model.layers.3.self_attn.q_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.05167527124285698, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.045966148376464844, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.04179873317480087, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.03677768260240555, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.023420222103595734, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.02069837599992752, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.03135772794485092, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.028634201735258102, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.024545514956116676, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.020626116544008255, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.019253069534897804, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.015902183949947357, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.013685530982911587, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.011329933069646358, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.010723619721829891, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.00792867224663496, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.005966358818113804, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.0056954603642225266, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.005418106913566589, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.004902209620922804, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.004179780837148428, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.004141275770962238, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.003400759305804968, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.0027646857779473066, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.015902183949947357, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.015902183949947357, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.3.self_attn.k_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.04879956692457199, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.04268066957592964, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.037850521504879, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.033197227865457535, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.02175910212099552, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.01868073083460331, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.03034919686615467, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.027702130377292633, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.022958768531680107, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.018965154886245728, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.017962465062737465, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.015230149030685425, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.013188385404646397, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.010509115643799305, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.009761268272995949, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.00764027563855052, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.0054747010581195354, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.005140858702361584, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.0049290163442492485, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.004336697515100241, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.003952589817345142, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.003831533482298255, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.003057259600609541, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.002389395609498024, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.017962465062737465, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, "best_option": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.017962465062737465, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } } }, { "key": "model.layers.3.self_attn.v_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.19948098063468933, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.1780809760093689, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.1689322143793106, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.14829352498054504, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.09084174782037735, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.08377164602279663, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.11295627057552338, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.10087981820106506, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.09487523138523102, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.07948536425828934, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.07328289747238159, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.05762658640742302, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.04808512702584267, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.04357409477233887, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.04246114194393158, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.028639424592256546, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.022172264754772186, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.0217851921916008, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.01983914151787758, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.018523236736655235, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.014603898860514164, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.013657679781317711, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.012554582208395004, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.008190140128135681, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.018523236736655235, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.018523236736655235, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.3.self_attn.o_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.18562321364879608, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.15612365305423737, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.14636193215847015, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.12103327363729477, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.08076733350753784, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.07298630475997925, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.10013926029205322, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.09130190312862396, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.08815041184425354, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.06787904351949692, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.05993982031941414, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.051234520971775055, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.044099148362874985, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.03936624154448509, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.0381619818508625, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.025845564901828766, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.021213991567492485, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.020995432510972023, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.018759122118353844, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.01739063300192356, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.014106735587120056, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.014697793871164322, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.012619521468877792, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.010883650742471218, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.01739063300192356, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.01739063300192356, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.3.mlp.gate_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.23895478248596191, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.22351275384426117, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.21815775334835052, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.19544458389282227, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.11162415891885757, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.10728628188371658, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.12602731585502625, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.11624026298522949, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.11394437402486801, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.10134368389844894, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.09274012595415115, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.06418294459581375, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.055620402097702026, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.05353105813264847, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.05304175987839699, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.032106056809425354, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.02752098999917507, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.027378683909773827, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.02541925758123398, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.024497665464878082, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.01693081296980381, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.01670014299452305, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.016246864572167397, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.010980273596942425, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.01693081296980381, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.01693081296980381, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.3.mlp.up_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.2604825794696808, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.24370554089546204, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.2380376160144806, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.21357814967632294, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.12176807969808578, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.11703289300203323, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.1373552680015564, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.12659098207950592, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.1241847425699234, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.11056827008724213, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.101253941655159, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.06991779059171677, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.060571130365133286, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.05835912749171257, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.05783981829881668, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.03497815504670143, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.02993033453822136, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.029766300693154335, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.02763722650706768, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.026638416573405266, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.018407518044114113, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.018026230856776237, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.01767915114760399, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.011656664311885834, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.018407518044114113, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.018407518044114113, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.3.mlp.down_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1711869673295454, "total_bits": 25043200.0, "err": 0.24043232202529907, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 27402496.0, "err": 0.2093001753091812, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 30286080.0, "err": 0.1972164809703827, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.716641512784091, "total_bits": 31334656.0, "err": 0.1752241849899292, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.216641512784091, "total_bits": 37101824.0, "err": 0.11009415239095688, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7393687855113638, "total_bits": 43131136.0, "err": 0.09714580327272415, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 34965568.0, "err": 0.13276241719722748, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 36053248.0, "err": 0.12036368995904922, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1711869673295454, "total_bits": 36577536.0, "err": 0.11414580047130585, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.534823330965909, "total_bits": 40771840.0, "err": 0.09171132743358612, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.653775301846591, "total_bits": 42143872.0, "err": 0.08797445148229599, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 46499904.0, "err": 0.06752276420593262, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 47587584.0, "err": 0.05784640461206436, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.216641512784091, "total_bits": 48636160.0, "err": 0.05319178104400635, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.307550603693182, "total_bits": 49684736.0, "err": 0.052052341401576996, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 58034240.0, "err": 0.0340491458773613, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.216641512784091, "total_bits": 60170496.0, "err": 0.028120798990130424, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.307550603693182, "total_bits": 61219072.0, "err": 0.02771717682480812, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.534823330965909, "total_bits": 63840512.0, "err": 0.024567799642682076, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.693914240056818, "total_bits": 65675520.0, "err": 0.024028783664107323, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 69568576.0, "err": 0.018670665100216866, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 70656256.0, "err": 0.018575215712189674, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.017098840326070786, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.013235227204859257, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 70656256.0, "err": 0.018575215712189674, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 70656256.0, "err": 0.018575215712189674, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.self_attn.q_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.07323497533798218, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.06659331917762756, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.06239449605345726, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.05516788363456726, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.03365042433142662, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.030810615047812462, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.043082352727651596, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.038869038224220276, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.03489275649189949, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.029995791614055634, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.027810653671622276, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.021849479526281357, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.01857728138566017, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.016240565106272697, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.015644438564777374, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.010908025316894054, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.008445980027318, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.00819102581590414, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.00770538067445159, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.0071419402956962585, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.005661108996719122, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.005566820502281189, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.004773684311658144, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.0036439180839806795, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.01857728138566017, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.01857728138566017, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.self_attn.k_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.07508902996778488, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.06715988367795944, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.06030618026852608, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.052973877638578415, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.033708348870277405, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.029469773173332214, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.04627642408013344, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.042364396154880524, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.03528621420264244, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.029853293672204018, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.028161996975541115, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.023327946662902832, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.020058756694197655, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.016241440549492836, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.015202632173895836, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.011690937913954258, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.008432338945567608, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.007958896458148956, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.007660393137484789, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.006812365725636482, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.006013466976583004, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.005807346198707819, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.004665660206228495, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.003612902481108904, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.016241440549492836, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.016241440549492836, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.4.self_attn.v_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.21179381012916565, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.19216810166835785, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.18347422778606415, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.16254562139511108, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.09697939455509186, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.09056148678064346, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.11832433938980103, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.10620029270648956, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.10026292502880096, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.08612433075904846, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.07892131060361862, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.06027243658900261, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.05071935057640076, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.04650503769516945, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.045477379113435745, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.030057059600949287, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.023662613704800606, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.023258820176124573, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.021416887640953064, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.02012883499264717, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.015329765155911446, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.014447798952460289, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.01345787663012743, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.008602463640272617, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.015329765155911446, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.015329765155911446, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.self_attn.o_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.2272782027721405, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.18347036838531494, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.1649255007505417, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.13824652135372162, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.10040499269962311, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.08509864658117294, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.12984830141067505, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.11777956038713455, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.10878311097621918, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.07969237118959427, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.07267589867115021, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.06671246141195297, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.0567208006978035, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.04881008341908455, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.04681084305047989, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.03348758444190025, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.026123758405447006, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.025536850094795227, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.02220209874212742, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.02014857716858387, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.018256070092320442, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.018250390887260437, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.015666445717215538, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.013058114796876907, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.018256070092320442, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.018250390887260437, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.mlp.gate_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.23792964220046997, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.22251945734024048, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.21733886003494263, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.19490179419517517, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.1113358587026596, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.1070302277803421, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.12543904781341553, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.11581749469041824, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.1135798916220665, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.10114094614982605, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.0924125462770462, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.06395768374204636, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.055396631360054016, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.053403306752443314, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.05292413383722305, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.03197747468948364, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.027470745146274567, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.027327533811330795, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.02537960186600685, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.024464869871735573, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.016904184594750404, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.016660591587424278, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.01623961701989174, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.010937239974737167, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.016904184594750404, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.016904184594750404, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.mlp.up_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.26513028144836426, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.2481461614370346, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.24231462180614471, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.21740096807479858, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.1239686831831932, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.11920523643493652, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.13966844975948334, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.12890681624412537, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.1264624297618866, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.11258721351623535, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.10309464484453201, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.07096870988607407, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.061592187732458115, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.05937359109520912, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.05884777754545212, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.03548326715826988, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.03030867502093315, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.03015800565481186, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.02796187624335289, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.02694077044725418, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.018488600850105286, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.0180415827780962, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.017742542549967766, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.011390500701963902, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.018488600850105286, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.018488600850105286, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.mlp.down_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1711869673295454, "total_bits": 25043200.0, "err": 0.25009220838546753, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 27402496.0, "err": 0.2201365977525711, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 30286080.0, "err": 0.20867040753364563, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.716641512784091, "total_bits": 31334656.0, "err": 0.18460068106651306, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.216641512784091, "total_bits": 37101824.0, "err": 0.11561404913663864, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7393687855113638, "total_bits": 43131136.0, "err": 0.10285687446594238, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 34965568.0, "err": 0.13652339577674866, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 36053248.0, "err": 0.12503276765346527, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1711869673295454, "total_bits": 36577536.0, "err": 0.11935494840145111, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.534823330965909, "total_bits": 40771840.0, "err": 0.09659862518310547, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.653775301846591, "total_bits": 42143872.0, "err": 0.09175670146942139, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 46499904.0, "err": 0.06963574141263962, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 47587584.0, "err": 0.060104165226221085, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.216641512784091, "total_bits": 48636160.0, "err": 0.0558154322206974, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.307550603693182, "total_bits": 49684736.0, "err": 0.05477645993232727, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 58034240.0, "err": 0.03494240343570709, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.216641512784091, "total_bits": 60170496.0, "err": 0.029354332014918327, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.307550603693182, "total_bits": 61219072.0, "err": 0.028994129970669746, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.534823330965909, "total_bits": 63840512.0, "err": 0.025633664801716805, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.693914240056818, "total_bits": 65675520.0, "err": 0.02520192228257656, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 69568576.0, "err": 0.018777770921587944, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 70656256.0, "err": 0.019090179353952408, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.017380136996507645, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.01346561312675476, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.017380136996507645, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.017380136996507645, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.5.self_attn.q_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.0667695701122284, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.06086475029587746, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.057316724210977554, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.050812993198633194, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.03082890436053276, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.02839840203523636, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.03862673044204712, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.035154566168785095, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.031886592507362366, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.027456598356366158, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.02533561736345291, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.01952255703508854, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.016786575317382812, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.014849834144115448, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.014361836947500706, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.009755312465131283, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.0076879896223545074, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.007474817335605621, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.007001426070928574, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.006507731042802334, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.005054732784628868, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.0049782912246882915, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.004363073036074638, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.003202626947313547, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.016786575317382812, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.016786575317382812, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.self_attn.k_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.06760940700769424, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.06069566309452057, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.05606301128864288, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.04935648292303085, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.030749009922146797, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.027582606300711632, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.040557440370321274, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.03667735680937767, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.0319780595600605, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.027088705450296402, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.025514822453260422, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.02051367051899433, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.017486300319433212, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.014813821762800217, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.014119341969490051, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.010218665935099125, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.007630432490259409, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.007308933883905411, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.0069021196104586124, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.006289754994213581, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.005281064659357071, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.005031120963394642, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.004324481822550297, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.0031102083157747984, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.017486300319433212, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.017486300319433212, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.self_attn.v_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.22100695967674255, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.20058783888816833, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.19266650080680847, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.17044700682163239, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.10148517787456512, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.09499488770961761, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.12195170670747757, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.11027207970619202, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.10482462495565414, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.09001801162958145, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.08260940760374069, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.06233854964375496, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.05271514505147934, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.04867568612098694, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.0476958341896534, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.031123196706175804, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.02474284917116165, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.02437029778957367, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.022314658388495445, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.021056286990642548, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.01594037376344204, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.014970584772527218, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.014147679321467876, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.008930291049182415, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.01594037376344204, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.01594037376344204, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.self_attn.o_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.22782619297504425, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.19408296048641205, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.18064682185649872, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.14588208496570587, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.10274957120418549, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.09239131957292557, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.12718579173088074, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.11603082716464996, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.10905562341213226, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.08423349261283875, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.07286298274993896, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.065587118268013, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.05644611269235611, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.0503869354724884, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.048872459679841995, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.03314526006579399, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.02762598730623722, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.02716935984790325, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.024212893098592758, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.022425509989261627, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.018199918791651726, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.019639942795038223, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.016211695969104767, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.015106480568647385, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.018199918791651726, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.018199918791651726, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.mlp.gate_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.23688727617263794, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.22130513191223145, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.21604087948799133, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.1935681700706482, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.11093620210886002, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.10653118789196014, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.12545374035835266, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.11569253355264664, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.11322690546512604, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.10060305893421173, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.09208793193101883, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.06384032219648361, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.05539831519126892, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.053223755210638046, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.05271467566490173, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.031946636736392975, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.0273958221077919, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.027240188792347908, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.025297001004219055, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.024372141808271408, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.01679510809481144, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.016672207042574883, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.016074195504188538, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.010974062606692314, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.01679510809481144, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.01679510809481144, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.mlp.up_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.2685748338699341, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.25118348002433777, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.24540236592292786, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.22026492655277252, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.12578335404396057, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.120864637196064, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.1418038010597229, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.13091908395290375, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.12830068171024323, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.11414723843336105, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.10453663021326065, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.07208498567342758, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.06260373443365097, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.06026509404182434, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.05971819534897804, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.03602222353219986, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.03076450526714325, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.03058772347867489, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.028363458812236786, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.02731601893901825, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.01867285929620266, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.018307702615857124, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.017871104180812836, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.011539102531969547, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.018307702615857124, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.018307702615857124, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.mlp.down_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1711869673295454, "total_bits": 25043200.0, "err": 0.25508934259414673, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 27402496.0, "err": 0.22483088076114655, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 30286080.0, "err": 0.2132546454668045, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.716641512784091, "total_bits": 31334656.0, "err": 0.19031761586666107, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.216641512784091, "total_bits": 37101824.0, "err": 0.11804239451885223, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7393687855113638, "total_bits": 43131136.0, "err": 0.10513487458229065, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 34965568.0, "err": 0.13860101997852325, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 36053248.0, "err": 0.12747661769390106, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1711869673295454, "total_bits": 36577536.0, "err": 0.12185122817754745, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.534823330965909, "total_bits": 40771840.0, "err": 0.09934265911579132, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.653775301846591, "total_bits": 42143872.0, "err": 0.09485990554094315, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 46499904.0, "err": 0.07099239528179169, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 47587584.0, "err": 0.061419155448675156, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.216641512784091, "total_bits": 48636160.0, "err": 0.05699155107140541, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.307550603693182, "total_bits": 49684736.0, "err": 0.05593690648674965, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 58034240.0, "err": 0.03571805730462074, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.216641512784091, "total_bits": 60170496.0, "err": 0.029969440773129463, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.307550603693182, "total_bits": 61219072.0, "err": 0.029583830386400223, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.534823330965909, "total_bits": 63840512.0, "err": 0.02630510739982128, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.693914240056818, "total_bits": 65675520.0, "err": 0.025823745876550674, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 69568576.0, "err": 0.019171006977558136, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 70656256.0, "err": 0.019478460773825645, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.01772211864590645, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.013682866469025612, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.01772211864590645, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.01772211864590645, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.6.self_attn.q_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.06077064573764801, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.054223671555519104, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.04858695715665817, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.04299312084913254, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.02765658125281334, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.024012839421629906, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.03877316042780876, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.0348312146961689, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.02897617407143116, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.024492157623171806, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.023175079375505447, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.01974065788090229, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.016765514388680458, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.0134319718927145, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.01254250667989254, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.009868853725492954, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.0071443007327616215, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.006728834472596645, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.006524139549583197, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.0058326260186731815, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.00514732301235199, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.005117994733154774, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.003950158134102821, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.003451613476499915, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.016765514388680458, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.016765514388680458, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.self_attn.k_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.06397560238838196, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.0560443215072155, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.048105016350746155, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.042227085679769516, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.028160136193037033, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.023449178785085678, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.04296274483203888, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.03843864053487778, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.029828602448105812, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.024908123537898064, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.024080924689769745, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.021559324115514755, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.018099099397659302, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.01366856787353754, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.01240430399775505, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.010788335464894772, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.007182315457612276, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.006564240902662277, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.006529767531901598, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.005574838723987341, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.005481288768351078, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.005259690806269646, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.003831833368167281, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.003268665401265025, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.018099099397659302, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.018099099397659302, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.self_attn.v_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.20098699629306793, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.18178245425224304, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.17303937673568726, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.1525053083896637, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.09237396717071533, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.08591090887784958, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.1152733862400055, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.10189057886600494, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.09583041071891785, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.08152416348457336, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.07495231926441193, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.0588710755109787, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.048748165369033813, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.044353630393743515, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.043314334005117416, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.029373347759246826, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.022618114948272705, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.022138137370347977, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.020333826541900635, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.019047904759645462, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.014902813360095024, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.013836415484547615, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.012826608493924141, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.008237863890826702, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.014902813360095024, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.014902813360095024, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.self_attn.o_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.19116640090942383, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.16374745965003967, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.15111353993415833, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.1278506964445114, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.08657433837652206, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.07696253061294556, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.1108848825097084, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.09922800213098526, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.09099076688289642, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.07184666395187378, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.06475820392370224, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.05730048567056656, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.04831927269697189, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.042676448822021484, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.041241541504859924, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.028954897075891495, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.023626942187547684, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.023125598207116127, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.02093041129410267, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.019489208236336708, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.016118640080094337, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.017122210934758186, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.014295843429863453, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.013347726315259933, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.016118640080094337, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.016118640080094337, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.mlp.gate_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.21977441012859344, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.20534829795360565, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.20037421584129333, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.17951470613479614, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.10283723473548889, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.09875220060348511, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.11604978889226913, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.10720830410718918, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.10495036840438843, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.09321112930774689, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.08529741317033768, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.05906175449490547, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.05128191411495209, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.049286939203739166, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.048808760941028595, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.029510166496038437, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.025336911901831627, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.025197168812155724, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.02338920533657074, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.022518133744597435, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.015399187803268433, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.01535479910671711, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.01472042128443718, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.010019446723163128, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.015399187803268433, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.015399187803268433, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.mlp.up_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.2635166049003601, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.24626578390598297, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.24062275886535645, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.2160123884677887, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.12345230579376221, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.11866230517625809, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.13911773264408112, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.1284773349761963, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.12594352662563324, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.11207003146409988, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.102605439722538, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.07071103900671005, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.061450425535440445, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.05913592502474785, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.058597661554813385, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.035335808992385864, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.030176104977726936, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.030014656484127045, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.027841243892908096, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.026807071641087532, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.018268970772624016, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.017966290935873985, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.017464306205511093, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.011293221265077591, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.018268970772624016, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.018268970772624016, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.mlp.down_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1711869673295454, "total_bits": 25043200.0, "err": 0.2577192783355713, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 27402496.0, "err": 0.22637324035167694, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 30286080.0, "err": 0.21451960504055023, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.716641512784091, "total_bits": 31334656.0, "err": 0.18992379307746887, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.216641512784091, "total_bits": 37101824.0, "err": 0.11950915306806564, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7393687855113638, "total_bits": 43131136.0, "err": 0.10622002184391022, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 34965568.0, "err": 0.14190764725208282, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 36053248.0, "err": 0.12902414798736572, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1711869673295454, "total_bits": 36577536.0, "err": 0.1233576089143753, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.534823330965909, "total_bits": 40771840.0, "err": 0.09969564527273178, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.653775301846591, "total_bits": 42143872.0, "err": 0.09510693699121475, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 46499904.0, "err": 0.07296475768089294, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 47587584.0, "err": 0.06237515062093735, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.216641512784091, "total_bits": 48636160.0, "err": 0.05806366726756096, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.307550603693182, "total_bits": 49684736.0, "err": 0.05698659271001816, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 58034240.0, "err": 0.03679642081260681, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.216641512784091, "total_bits": 60170496.0, "err": 0.031000329181551933, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.307550603693182, "total_bits": 61219072.0, "err": 0.030639424920082092, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.534823330965909, "total_bits": 63840512.0, "err": 0.027212420478463173, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.693914240056818, "total_bits": 65675520.0, "err": 0.026769891381263733, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 69568576.0, "err": 0.020175417885184288, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 70656256.0, "err": 0.020756442099809647, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.01872510462999344, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.01534278690814972, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.01534278690814972, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.01534278690814972, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.7.self_attn.q_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.08387460559606552, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.07658243924379349, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.0713297426700592, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.06345835328102112, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.03874193876981735, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.03530547022819519, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.04998071864247322, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.04538758471608162, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.04008126258850098, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.0346817821264267, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.032460421323776245, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.025458574295043945, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.02181851491332054, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.01884469948709011, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.018093222752213478, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.01280019897967577, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.010084927082061768, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.009754825383424759, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.009308912791311741, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.00864731427282095, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.006880063097923994, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.007023813668638468, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.005875651724636555, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.0050345552153885365, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.018093222752213478, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.018093222752213478, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.7.self_attn.k_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.07640736550092697, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.06896872073411942, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.0637039765715599, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.056201934814453125, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.034709006547927856, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.03107767552137375, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.04492589831352234, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.04144211485981941, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.03599495068192482, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.03081841953098774, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.028700267896056175, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.022778509184718132, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.019785020500421524, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.016676468774676323, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.015868907794356346, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.011379907839000225, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.008602513931691647, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.00824414286762476, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.007821928709745407, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.0071257040835917, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.005920317489653826, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.005704816430807114, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.004904665984213352, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.003538705175742507, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.016676468774676323, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.016676468774676323, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.7.self_attn.v_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.23897412419319153, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.22036553919315338, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.21331429481506348, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.18930460512638092, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.11096811294555664, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.10520406812429428, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.12939360737800598, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.11707746982574463, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.11377877742052078, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.09904935210943222, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.0906912311911583, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.0660463497042656, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.05600113421678543, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.053059522062540054, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.05233001708984375, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.033057279884815216, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.026864906772971153, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.02666827291250229, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.02440457232296467, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.023278528824448586, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.016915496438741684, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.01585424691438675, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.015500754117965698, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.009457088075578213, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.016915496438741684, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.016915496438741684, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.7.self_attn.o_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.17959070205688477, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.15740282833576202, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.1495659053325653, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.1242281123995781, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.08194155991077423, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.07535754889249802, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.09852418303489685, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.08906134217977524, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.08563034981489182, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.0682787224650383, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.06063317507505417, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.05069394037127495, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.04305335134267807, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.03979308903217316, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.038998574018478394, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.025409778580069542, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.021181602030992508, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.020950788632035255, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.01852722093462944, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.01744760200381279, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.013665542006492615, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.014163418672978878, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.012539383955299854, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.010341083630919456, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.01852722093462944, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.01852722093462944, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.7.mlp.gate_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.19812723994255066, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.1854231208562851, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.18058134615421295, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.16199782490730286, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.09289845079183578, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.0889785885810852, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.10598478466272354, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.09750998765230179, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.09476631134748459, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.08433600515127182, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.07739708572626114, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.0539734847843647, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.04673818126320839, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.04462037235498428, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.04410674795508385, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.026960963383316994, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.02305891178548336, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.022878188639879227, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.02132684551179409, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.020506657660007477, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.014101906679570675, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.014240133576095104, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.013328838162124157, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.009521900676190853, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.014101906679570675, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.014101906679570675, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.7.mlp.up_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.25232869386672974, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.2364559918642044, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.23043984174728394, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.20731329917907715, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.11838951706886292, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.11364418268203735, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.13490566611289978, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.12402253597974777, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.1208348423242569, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.10773613303899765, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.09914252161979675, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.0686986893415451, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.05934600159525871, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.05680585280060768, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.05619558319449425, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.03433503583073616, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.02911602519452572, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.028903784230351448, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.026914268732070923, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.02588820829987526, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.017825504764914513, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.017582565546035767, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.016874399036169052, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.011299675330519676, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.017825504764914513, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.017825504764914513, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.7.mlp.down_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1711869673295454, "total_bits": 25043200.0, "err": 0.0776955634355545, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 27402496.0, "err": 0.06778603792190552, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 30286080.0, "err": 0.060056108981370926, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.716641512784091, "total_bits": 31334656.0, "err": 0.05343778803944588, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.216641512784091, "total_bits": 37101824.0, "err": 0.03596542775630951, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7393687855113638, "total_bits": 43131136.0, "err": 0.02961646020412445, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 34965568.0, "err": 0.04973512142896652, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 36053248.0, "err": 0.04501104727387428, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1711869673295454, "total_bits": 36577536.0, "err": 0.037378787994384766, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.534823330965909, "total_bits": 40771840.0, "err": 0.030424073338508606, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.653775301846591, "total_bits": 42143872.0, "err": 0.02955963835120201, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 46499904.0, "err": 0.025061381980776787, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 47587584.0, "err": 0.021897170692682266, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.216641512784091, "total_bits": 48636160.0, "err": 0.01786804012954235, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.307550603693182, "total_bits": 49684736.0, "err": 0.016571180894970894, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 58034240.0, "err": 0.01329212635755539, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.216641512784091, "total_bits": 60170496.0, "err": 0.009736159816384315, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.307550603693182, "total_bits": 61219072.0, "err": 0.0089726522564888, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.534823330965909, "total_bits": 63840512.0, "err": 0.008707622066140175, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.693914240056818, "total_bits": 65675520.0, "err": 0.007808191701769829, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 69568576.0, "err": 0.0075769503600895405, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 70656256.0, "err": 0.007264985702931881, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.005827795714139938, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.004935271106660366, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.216641512784091, "total_bits": 48636160.0, "err": 0.01786804012954235, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.216641512784091, "total_bits": 48636160.0, "err": 0.01786804012954235, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.8.self_attn.q_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.07782578468322754, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.07102957367897034, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.06109463796019554, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.054528240114450455, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.035899605602025986, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.02974778413772583, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.05710040405392647, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.04909852519631386, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.03731076419353485, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.032545626163482666, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.03216150030493736, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.029854826629161835, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.02373654581606388, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.017531171441078186, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.015711070969700813, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.014970916323363781, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.009331106208264828, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.008390149101614952, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.008681903593242168, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.007390749175101519, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.007762791123241186, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.007058116607367992, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.005184692330658436, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.004527516663074493, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.017531171441078186, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.017531171441078186, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.8.self_attn.k_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.07482267916202545, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.0681774765253067, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.054717183113098145, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.049004361033439636, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.03397195413708687, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.025991952046751976, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.05852639302611351, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.05103476345539093, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.035371311008930206, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.031021475791931152, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.031477462500333786, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.030398976057767868, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.024433890357613564, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.01653187908232212, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.014082328416407108, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.015172768384218216, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.00871698185801506, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.007393583655357361, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.008132752031087875, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.006429068278521299, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.007776862476021051, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.006895194295793772, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.0046309251338243484, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.004034081008285284, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.01653187908232212, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.01653187908232212, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.8.self_attn.v_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.20074434578418732, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.18160612881183624, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.1698680967092514, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.1503070741891861, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.09270670264959335, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.08419041335582733, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.13479188084602356, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.10720864683389664, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.09661272168159485, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.08235964924097061, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.07834476977586746, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.07159778475761414, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.0519506111741066, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.04465509206056595, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.04276752844452858, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.03603076562285423, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.022844145074486732, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.021995116025209427, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.020721998065710068, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.018951361998915672, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.018172193318605423, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.014726984314620495, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.012899992987513542, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.00872267410159111, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.018172193318605423, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.018172193318605423, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.8.self_attn.o_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.20312316715717316, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.17190220952033997, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.15589958429336548, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.13277074694633484, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.0921405553817749, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.08021234720945358, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.1214168593287468, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.1092396080493927, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.09703516960144043, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.07622654736042023, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.0700179785490036, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.06271671503782272, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.053002603352069855, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.04536398872733116, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.04338924214243889, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.031695496290922165, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.024989333003759384, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.02422214113175869, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.022052794694900513, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.02026715688407421, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.017507290467619896, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.01822475530207157, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.01500028744339943, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.013887443579733372, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.017507290467619896, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.017507290467619896, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.8.mlp.gate_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.2185717225074768, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.20494624972343445, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.20037905871868134, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.18013349175453186, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.10248716175556183, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.09860630333423615, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.11551687121391296, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.10653248429298401, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.10452829301357269, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.09324032068252563, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.08555958420038223, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.05881969630718231, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.050977304577827454, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.049118395894765854, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.048683688044548035, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.029405390843749046, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.02523154392838478, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.02510780841112137, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.023332824930548668, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.022516746073961258, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.015411026775836945, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.015237453393638134, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.014774145558476448, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.00991768017411232, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.015411026775836945, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.015411026775836945, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.8.mlp.up_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.2669321596622467, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.2503944933414459, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.24494680762290955, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.22039593756198883, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.12545795738697052, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.1208667904138565, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.14120936393737793, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.13007035851478577, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.1278909593820572, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.11421258747577667, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.10500364005565643, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.0721132755279541, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.06238026171922684, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.06028657779097557, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.05976521223783493, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.03610794618725777, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.03114183247089386, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.031006813049316406, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.02885989472270012, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.02787870168685913, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.019143320620059967, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.019047845155000687, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.01843421906232834, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.012792154215276241, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.01843421906232834, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.01843421906232834, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.8.mlp.down_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1711869673295454, "total_bits": 25043200.0, "err": 0.2525647282600403, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 27402496.0, "err": 0.22114746272563934, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 30286080.0, "err": 0.20793628692626953, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.716641512784091, "total_bits": 31334656.0, "err": 0.18460753560066223, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.216641512784091, "total_bits": 37101824.0, "err": 0.11681418120861053, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7393687855113638, "total_bits": 43131136.0, "err": 0.10267439484596252, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 34965568.0, "err": 0.14166691899299622, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 36053248.0, "err": 0.12893794476985931, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1711869673295454, "total_bits": 36577536.0, "err": 0.12099816650152206, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.534823330965909, "total_bits": 40771840.0, "err": 0.09776953607797623, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.653775301846591, "total_bits": 42143872.0, "err": 0.09370040893554688, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 46499904.0, "err": 0.07291020452976227, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 47587584.0, "err": 0.06243586912751198, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.216641512784091, "total_bits": 48636160.0, "err": 0.0566733255982399, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.307550603693182, "total_bits": 49684736.0, "err": 0.05522209033370018, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 58034240.0, "err": 0.03682146966457367, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.216641512784091, "total_bits": 60170496.0, "err": 0.030161771923303604, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.307550603693182, "total_bits": 61219072.0, "err": 0.02960359863936901, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.534823330965909, "total_bits": 63840512.0, "err": 0.02650297060608864, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.693914240056818, "total_bits": 65675520.0, "err": 0.025789882987737656, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 69568576.0, "err": 0.01997300609946251, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 70656256.0, "err": 0.020306304097175598, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.017948195338249207, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.014618287794291973, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.017948195338249207, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.017948195338249207, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.9.self_attn.q_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.08288468420505524, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.07580557465553284, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.07136540114879608, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.06351900100708008, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.038291577249765396, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.03523882478475571, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.04856337234377861, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.04385913163423538, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.039468150585889816, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.03428732976317406, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.03205396980047226, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.024708811193704605, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.020988067612051964, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.018479827791452408, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.017856981605291367, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.012388940900564194, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.009670811705291271, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.009388134814798832, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.008870111778378487, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.008275610394775867, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.006516005378216505, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.00640101870521903, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.005609055981040001, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.004288644064217806, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.018479827791452408, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.018479827791452408, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.9.self_attn.k_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.0707879438996315, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.0642148032784462, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.05896366760134697, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.05230674892663956, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.0322505384683609, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.028864674270153046, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.04284292459487915, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.038979578763246536, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.033428579568862915, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.02880004793405533, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.0269760899245739, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.02153400145471096, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.01858477294445038, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.015501177869737148, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.014706499874591827, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.010753186419606209, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.007969068363308907, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.007572133094072342, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.007266236934810877, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.006546279415488243, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.005519118160009384, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.005287183448672295, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.004424711223691702, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.0031430928502231836, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.015501177869737148, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.015501177869737148, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.9.self_attn.v_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.2146962285041809, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.19786620140075684, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.1913551390171051, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.17002980411052704, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.09920322895050049, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.09384682774543762, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.11735951900482178, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.10578002780675888, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.10200946778059006, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.08875273913145065, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.08165819942951202, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.06017822399735451, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.050598789006471634, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.04754716530442238, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.04676518961787224, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.030194491147994995, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.023999502882361412, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.023733049631118774, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.02179277502000332, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.02074000984430313, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.01535507757216692, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.01415094081312418, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.013793012127280235, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.008250923827290535, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.01535507757216692, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.01535507757216692, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.self_attn.o_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.23219110071659088, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.20286929607391357, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.19170017540454865, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.1634770780801773, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.10499465465545654, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.09595867246389389, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.12647750973701477, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.11592815816402435, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.10996680706739426, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.0890091210603714, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.07905448973178864, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.06459756940603256, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.05551792308688164, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.05056747794151306, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.04930835962295532, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.032294150441884995, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.026309192180633545, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.02590632624924183, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.023103073239326477, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.021564945578575134, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.016995234414935112, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.016919013112783432, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.01535056158900261, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.01132791955024004, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.016995234414935112, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.016995234414935112, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.mlp.gate_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.2009706199169159, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.18816827237606049, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.1836695522069931, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.16531695425510406, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.09413418173789978, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.0904185026884079, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.10633309185504913, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.09829081594944, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.09602973610162735, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.08555209636688232, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.0785582885146141, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.05416681990027428, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.047054458409547806, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.045135363936424255, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.04470640793442726, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.027076540514826775, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.02320808917284012, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.023060297593474388, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.021458500996232033, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.020673464983701706, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.014145760796964169, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.014082466252148151, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.013492058031260967, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.009198029525578022, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.014145760796964169, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.014145760796964169, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.mlp.up_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.2599634528160095, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.24363423883914948, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.23808275163173676, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.21431486308574677, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.12171933799982071, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.11710329353809357, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.13716202974319458, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.12668254971504211, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.1241946667432785, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.11083609610795975, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.10177850723266602, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.069761723279953, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.0605628676712513, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.05833596736192703, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.05780152231454849, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.03482336178421974, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.029785240069031715, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.02962440438568592, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.027524257078766823, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.02652619779109955, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.018000470474362373, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.01773393712937832, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.01722678355872631, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.011178629472851753, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.018000470474362373, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.018000470474362373, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.mlp.down_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1711869673295454, "total_bits": 25043200.0, "err": 0.264125257730484, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 27402496.0, "err": 0.22965259850025177, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 30286080.0, "err": 0.21671777963638306, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.716641512784091, "total_bits": 31334656.0, "err": 0.1907091736793518, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.216641512784091, "total_bits": 37101824.0, "err": 0.12199351191520691, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7393687855113638, "total_bits": 43131136.0, "err": 0.10747534036636353, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 34965568.0, "err": 0.14546935260295868, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 36053248.0, "err": 0.13242121040821075, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1711869673295454, "total_bits": 36577536.0, "err": 0.12639445066452026, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.534823330965909, "total_bits": 40771840.0, "err": 0.10051429271697998, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.653775301846591, "total_bits": 42143872.0, "err": 0.0957312360405922, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 46499904.0, "err": 0.07469859719276428, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 47587584.0, "err": 0.06388676166534424, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.216641512784091, "total_bits": 48636160.0, "err": 0.05910537391901016, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.307550603693182, "total_bits": 49684736.0, "err": 0.05794588848948479, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 58034240.0, "err": 0.037712957710027695, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.216641512784091, "total_bits": 60170496.0, "err": 0.03135894984006882, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.307550603693182, "total_bits": 61219072.0, "err": 0.030965568497776985, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.534823330965909, "total_bits": 63840512.0, "err": 0.027196714654564857, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.693914240056818, "total_bits": 65675520.0, "err": 0.02669408917427063, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 69568576.0, "err": 0.020565122365951538, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 70656256.0, "err": 0.020776230841875076, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.018997978419065475, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.015021412633359432, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.015021412633359432, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.015021412633359432, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.self_attn.q_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.08422723412513733, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.07753846049308777, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.07351589947938919, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.06558804959058762, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.03903986141085625, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.036322616040706635, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.04842653125524521, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.04397911950945854, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.04018928110599518, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.0351191908121109, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.03278316557407379, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.02462443895637989, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.021053645759820938, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.018876895308494568, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.018330173566937447, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.012337195686995983, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.009894290938973427, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.009635694324970245, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.009104136377573013, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.008551155216991901, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.0064972941763699055, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.0065118661150336266, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.005716334562748671, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.004424331244081259, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.018330173566937447, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.018330173566937447, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.10.self_attn.k_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.08026231825351715, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.07314218580722809, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.06808122992515564, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.060353800654411316, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.03664788603782654, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.03318144381046295, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.047009311616420746, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.04308208078145981, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.03797987848520279, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.03276780620217323, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.030504845082759857, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.023874061182141304, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.020613234490156174, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.01763668656349182, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.01684427075088024, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.01191160548478365, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.009011122398078442, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.008640028536319733, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.00821555033326149, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.007506420370191336, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.006136863958090544, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.0058549921959638596, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.00507253548130393, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.003498346544802189, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.01763668656349182, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.01763668656349182, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.10.self_attn.v_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.23142355680465698, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.21275100111961365, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.20532609522342682, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.18288537859916687, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.10716258734464645, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.10125650465488434, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.12876325845718384, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.11517245322465897, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.1103052943944931, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.0961274802684784, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.08890929073095322, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.06611368805170059, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.055070310831069946, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.05136388912796974, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.05046825483441353, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.033284690231084824, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.025970883667469025, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.02564302273094654, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.023596232756972313, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.02240835502743721, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.01700415089726448, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.015486245974898338, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.015008462592959404, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.009074336849153042, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.01700415089726448, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.01700415089726448, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.self_attn.o_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.2401614636182785, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.21454884111881256, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.20489609241485596, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.17608599364757538, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.1095656231045723, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.1016768366098404, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.1312052458524704, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.11910377442836761, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.11393040418624878, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.09480789303779602, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.08446808159351349, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.06736946851015091, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.05743173509836197, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.05299653857946396, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.051922623068094254, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.03383155167102814, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.02807372249662876, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.027704276144504547, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.025177275761961937, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.023761801421642303, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.018299033865332603, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.018535412847995758, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.016832901164889336, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.013314464129507542, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.018299033865332603, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.018299033865332603, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.mlp.gate_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.20615744590759277, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.19316597282886505, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.1885974407196045, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.16965283453464508, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.09647153317928314, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.09278114140033722, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.1087745726108551, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.10044806450605392, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.09834209084510803, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.08782349526882172, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.08061838895082474, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.05542440339922905, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.04807731881737709, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.046267688274383545, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.045848824083805084, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.027735212817788124, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.023813528940081596, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.02367401495575905, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.022042986005544662, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.02125815488398075, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.014542767778038979, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.014461174607276917, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.013914831914007664, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.009493343532085419, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.014542767778038979, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.014542767778038979, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.mlp.up_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.2614828050136566, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.24532465636730194, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.24001912772655487, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.2161504626274109, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.12265806645154953, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.11813727766275406, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.13781912624835968, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.1271727979183197, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.12501990795135498, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.11171816289424896, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.10266847163438797, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.07022889703512192, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.060835376381874084, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.0587574727833271, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.05826609581708908, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.03508296608924866, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.030071571469306946, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.02993715927004814, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.02781904861330986, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.026855191215872765, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.01828853227198124, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.01798088289797306, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.017571544274687767, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.011535677127540112, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.01828853227198124, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.01828853227198124, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.mlp.down_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1711869673295454, "total_bits": 25043200.0, "err": 0.27199587225914, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 27402496.0, "err": 0.23954758048057556, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 30286080.0, "err": 0.22732208669185638, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.716641512784091, "total_bits": 31334656.0, "err": 0.20095761120319366, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.216641512784091, "total_bits": 37101824.0, "err": 0.126383438706398, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7393687855113638, "total_bits": 43131136.0, "err": 0.1126306802034378, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 34965568.0, "err": 0.1489320695400238, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 36053248.0, "err": 0.1364748179912567, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1711869673295454, "total_bits": 36577536.0, "err": 0.1304749846458435, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.534823330965909, "total_bits": 40771840.0, "err": 0.1054944097995758, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.653775301846591, "total_bits": 42143872.0, "err": 0.10042847692966461, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 46499904.0, "err": 0.07655633985996246, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 47587584.0, "err": 0.06580478698015213, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.216641512784091, "total_bits": 48636160.0, "err": 0.06120964512228966, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.307550603693182, "total_bits": 49684736.0, "err": 0.06008732691407204, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 58034240.0, "err": 0.038569483906030655, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.216641512784091, "total_bits": 60170496.0, "err": 0.0324164479970932, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.307550603693182, "total_bits": 61219072.0, "err": 0.03202987462282181, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.534823330965909, "total_bits": 63840512.0, "err": 0.02834380604326725, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.693914240056818, "total_bits": 65675520.0, "err": 0.02787092886865139, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 69568576.0, "err": 0.021082889288663864, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 70656256.0, "err": 0.02133786678314209, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.01955636776983738, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.015348692424595356, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.015348692424595356, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.015348692424595356, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.self_attn.q_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.10052758455276489, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.09118855744600296, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.08546284586191177, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.07547856867313385, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.04620230197906494, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.04231356084346771, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.05928529053926468, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.05314573645591736, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.04787538945674896, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.04097406938672066, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.038360435515642166, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.030274778604507446, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.025499310344457626, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.022382309660315514, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.021579891443252563, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.015273863449692726, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.011820007115602493, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.0114643145352602, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.010802106000483036, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.01004001684486866, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.008081161417067051, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.007994301617145538, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.006866979878395796, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.00553120719268918, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.015273863449692726, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.015273863449692726, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.self_attn.k_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.09240996837615967, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.08281392604112625, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.07688681781291962, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.06759893149137497, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.04185478016734123, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.037868935614824295, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.0535842590034008, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.049031730741262436, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.043575532734394073, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.03696522116661072, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.03422386571764946, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.027333997189998627, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.02350934036076069, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.020193839445710182, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.019336577504873276, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.013651666231453419, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.010364841669797897, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.009984632022678852, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.009365358389914036, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.008563157171010971, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.007009219378232956, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.006748532876372337, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.005799272563308477, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.00415604654699564, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.013651666231453419, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.013651666231453419, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.self_attn.v_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.2151636928319931, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.1993454545736313, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.19298595190048218, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.17132899165153503, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.09980317950248718, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.09487093240022659, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.11703996360301971, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.10619230568408966, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.10240572690963745, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.08967678993940353, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.08206755667924881, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.05966504290699959, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.05068381875753403, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.047801535576581955, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.047113578766584396, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.029823211953043938, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.024165883660316467, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.023915819823741913, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.022052982822060585, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.02098725363612175, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.01521419920027256, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.014277598820626736, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.013905111700296402, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.008366729132831097, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.01521419920027256, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.01521419920027256, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.self_attn.o_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.23094382882118225, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.20529335737228394, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.19449543952941895, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.1677083522081375, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.10551992058753967, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.09715679287910461, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.12886498868465424, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.11696158349514008, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.10985907912254333, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.09112299233675003, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.08211268484592438, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.06602565944194794, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.056194327771663666, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.05099426209926605, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.04970040172338486, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.033118836581707, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.026783347129821777, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.026338765397667885, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.023944560438394547, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.022424886003136635, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.01769491285085678, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.017598966136574745, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.015991168096661568, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.012229769490659237, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.01769491285085678, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.01769491285085678, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.mlp.gate_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.20427870750427246, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.19113610684871674, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.1865369826555252, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.16762088239192963, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.0958712100982666, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.09205042570829391, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.1085156723856926, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.10011317580938339, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.09777555614709854, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.08710137754678726, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.07994356006383896, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.055457666516304016, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.04799116030335426, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.0460764542222023, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.045634035021066666, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.027750618755817413, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.023846223950386047, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.023708796128630638, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.022068675607442856, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.021272290498018265, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.014715386554598808, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.014703408814966679, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.014066551811993122, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.009922328405082226, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.014715386554598808, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.014715386554598808, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.mlp.up_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.26540523767471313, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.24858328700065613, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.24328279495239258, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.2184782475233078, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.12467412650585175, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.1199759915471077, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.14044378697872162, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.1294286996126175, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.1271088868379593, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.11330686509609222, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.10396143049001694, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.07168206572532654, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.061985380947589874, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.05981304496526718, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.05932236462831497, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.03583113104104996, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.03069937601685524, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.030549153685569763, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.028372149914503098, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.027364304289221764, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.018784107640385628, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.018513621762394905, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.018039768561720848, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.012050348334014416, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.018513621762394905, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.018513621762394905, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.mlp.down_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1711869673295454, "total_bits": 25043200.0, "err": 0.278509259223938, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 27402496.0, "err": 0.24641668796539307, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 30286080.0, "err": 0.2341974824666977, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.716641512784091, "total_bits": 31334656.0, "err": 0.2058812826871872, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.216641512784091, "total_bits": 37101824.0, "err": 0.12973564863204956, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7393687855113638, "total_bits": 43131136.0, "err": 0.11593468487262726, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 34965568.0, "err": 0.15267562866210938, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 36053248.0, "err": 0.13998110592365265, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1711869673295454, "total_bits": 36577536.0, "err": 0.13365061581134796, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.534823330965909, "total_bits": 40771840.0, "err": 0.1082446426153183, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.653775301846591, "total_bits": 42143872.0, "err": 0.10247629880905151, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 46499904.0, "err": 0.07814832776784897, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 47587584.0, "err": 0.0674043744802475, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.216641512784091, "total_bits": 48636160.0, "err": 0.06268701702356339, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.307550603693182, "total_bits": 49684736.0, "err": 0.06154632568359375, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 58034240.0, "err": 0.03924864903092384, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.216641512784091, "total_bits": 60170496.0, "err": 0.03290930017828941, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.307550603693182, "total_bits": 61219072.0, "err": 0.03250858187675476, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.534823330965909, "total_bits": 63840512.0, "err": 0.028683120384812355, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.693914240056818, "total_bits": 65675520.0, "err": 0.028212418779730797, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 69568576.0, "err": 0.02107423171401024, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 70656256.0, "err": 0.021318724378943443, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.01953743025660515, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.014949865639209747, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.014949865639209747, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.014949865639209747, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.self_attn.q_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.10533785074949265, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.09709476679563522, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.09175943583250046, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.08131306618452072, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.0489036925137043, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.04531659558415413, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.06119195371866226, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.055494341999292374, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.050270237028598785, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.043875716626644135, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.04058479145169258, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.031134160235524178, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.02654246613383293, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.02357986755669117, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.022841710597276688, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.015567988157272339, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.012283171527087688, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.01194814033806324, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.01127624697983265, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.010542006231844425, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.008130088448524475, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.008034929633140564, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.007079704664647579, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.005349788349121809, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.015567988157272339, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.015567988157272339, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.self_attn.k_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.08119229227304459, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.07481244206428528, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.06907036155462265, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.06121115759015083, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.0373246856033802, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.03366665169596672, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.048594504594802856, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.044912658631801605, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.03833598271012306, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.033631786704063416, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.03146008774638176, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.02459622547030449, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.021413056179881096, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.01802005246281624, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.017111821100115776, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.012265348806977272, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.009262616746127605, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.008826758712530136, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.008515967056155205, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.007753821089863777, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.0063347527757287025, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.006173167377710342, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.00521814962849021, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.0038160604890435934, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.01802005246281624, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.01802005246281624, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.12.self_attn.v_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.25495636463165283, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.23500458896160126, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.22896414995193481, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.20282140374183655, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.11868467181921005, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.11299581825733185, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.13809455931186676, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.1243353933095932, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.121556356549263, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.10601375997066498, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.09674686193466187, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.07087880373001099, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.05947958678007126, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.056756358593702316, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.05609516799449921, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.0355205312371254, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.028602359816432, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.028409678488969803, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.025899944826960564, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.024806540459394455, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.018070898950099945, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.016596918925642967, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.016500340774655342, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.009652145206928253, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.018070898950099945, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.018070898950099945, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.self_attn.o_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.2445531040430069, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.21951152384281158, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.2102937549352646, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.1815250962972641, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.11134698241949081, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.10399111360311508, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.13260015845298767, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.12072626501321793, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.1159881055355072, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.09727279096841812, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.08671479672193527, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.06813937425613403, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.05817187950015068, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.053977496922016144, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.05290234088897705, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.03427741304039955, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.028622055426239967, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.028324585407972336, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.02586493454873562, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.024517308920621872, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.018557578325271606, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.0189231988042593, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.01719040237367153, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.013695762492716312, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.018557578325271606, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.018557578325271606, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.mlp.gate_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.185102641582489, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.17297662794589996, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.16834357380867004, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.15119323134422302, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.0867203027009964, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.08304210752248764, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.09926731139421463, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.09142503142356873, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.08860784024000168, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.07877270877361298, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.07239147275686264, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.05063508450984955, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.04384776949882507, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.04173332080245018, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.041226308792829514, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.025346718728542328, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.021687617525458336, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.021517403423786163, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.02007908932864666, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.019314860925078392, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.013423731550574303, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.01357551570981741, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.012695695273578167, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.009286079555749893, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.013423731550574303, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.013423731550574303, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.mlp.up_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.2585902810096741, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.24192234873771667, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.236167311668396, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.2121437042951584, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.1213783472776413, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.11648301035165787, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.1376987248659134, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.12676185369491577, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.12383105605840683, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.11018068343400955, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.10111676901578903, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.0700870007276535, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.060636281967163086, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.058156512677669525, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.057572752237319946, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.034995995461940765, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.029742922633886337, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.0295656006783247, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.0274440199136734, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.02641116827726364, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.018086042255163193, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.017840567976236343, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.017179744318127632, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.011340315453708172, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.018086042255163193, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.018086042255163193, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.mlp.down_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1711869673295454, "total_bits": 25043200.0, "err": 0.2689926326274872, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 27402496.0, "err": 0.2355649173259735, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 30286080.0, "err": 0.22256948053836823, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.716641512784091, "total_bits": 31334656.0, "err": 0.19531738758087158, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.216641512784091, "total_bits": 37101824.0, "err": 0.12506793439388275, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7393687855113638, "total_bits": 43131136.0, "err": 0.11062996834516525, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 34965568.0, "err": 0.15012265741825104, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 36053248.0, "err": 0.13579174876213074, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1711869673295454, "total_bits": 36577536.0, "err": 0.12914103269577026, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.534823330965909, "total_bits": 40771840.0, "err": 0.10328224301338196, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.653775301846591, "total_bits": 42143872.0, "err": 0.09796953201293945, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 46499904.0, "err": 0.07668344676494598, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 47587584.0, "err": 0.06555280089378357, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.216641512784091, "total_bits": 48636160.0, "err": 0.060688309371471405, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.307550603693182, "total_bits": 49684736.0, "err": 0.05949106812477112, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 58034240.0, "err": 0.038605157285928726, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.216641512784091, "total_bits": 60170496.0, "err": 0.03231900557875633, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.307550603693182, "total_bits": 61219072.0, "err": 0.031894534826278687, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.534823330965909, "total_bits": 63840512.0, "err": 0.028123382478952408, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.693914240056818, "total_bits": 65675520.0, "err": 0.027619624510407448, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 69568576.0, "err": 0.021093610674142838, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 70656256.0, "err": 0.021564176306128502, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.01950937695801258, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.01575227826833725, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.01575227826833725, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.01575227826833725, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.self_attn.q_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.1034003421664238, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.0952129065990448, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.0903434008359909, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.08027687668800354, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.04801972582936287, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.04460681229829788, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.059957265853881836, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.053768716752529144, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.04931880533695221, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.04292623698711395, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.039862290024757385, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.030466627329587936, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.02570669911801815, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.023144178092479706, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.0225101038813591, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.015231003053486347, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.01202636118978262, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.01174851879477501, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.011008932255208492, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.010338982567191124, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.007930243387818336, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.007773835211992264, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.006927092559635639, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.0051575335673987865, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.015231003053486347, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.015231003053486347, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.self_attn.k_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.0883515477180481, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.08091650903224945, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.07575777173042297, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.06680367887020111, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.040336333215236664, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.03675410896539688, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.05116438865661621, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.04706532135605812, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.04156993702054024, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.03604919835925102, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.03339690342545509, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.025965042412281036, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.02245284989476204, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.01938476786017418, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.018594592809677124, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.01295370515435934, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.009934448637068272, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.009544476866722107, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.00904869008809328, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.008306555449962616, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.006636309437453747, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.006397318560630083, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.005577175412327051, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.0038828931283205748, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.01295370515435934, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.01295370515435934, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.self_attn.v_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.2292148470878601, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.21212896704673767, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.2054663449525833, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.1822773665189743, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.10678738355636597, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.10132624208927155, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.12685050070285797, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.11319801211357117, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.10932386666536331, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.09565916657447815, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.08819898962974548, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.06528536975383759, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.05407600477337837, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.05107859894633293, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.050336070358753204, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.03262455016374588, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.025844236835837364, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.02557806670665741, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.023502785712480545, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.022400522604584694, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.016644475981593132, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.015233644284307957, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.014963570982217789, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.008954539895057678, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.016644475981593132, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.016644475981593132, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.self_attn.o_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.21731019020080566, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.19396373629570007, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.18492788076400757, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.16001130640506744, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.09933342784643173, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.09199158102273941, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.11821334809064865, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.10816904902458191, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.10297189652919769, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.08533008396625519, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.07761155813932419, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.06058024242520332, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.051963672041893005, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.047911640256643295, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.04692821204662323, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.03038577362895012, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.02514350414276123, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.024793406948447227, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.02239696867763996, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.021137850359082222, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.01620662212371826, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.016355542466044426, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.014879189431667328, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.011383822187781334, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.01620662212371826, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.01620662212371826, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.mlp.gate_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.18635791540145874, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.17434507608413696, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.16952823102474213, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.1525329053401947, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.08744893968105316, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.08369635045528412, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.10007233917713165, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.09220510721206665, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.08930587023496628, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.0795343741774559, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.07320482283830643, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.05106902867555618, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.04422736167907715, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.04208933934569359, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.04156942665576935, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.025554334744811058, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.021881388500332832, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.02169991284608841, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.020294038578867912, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.019522003829479218, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.013480699621140957, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.013721924275159836, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.012734588235616684, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.009406612254679203, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.013480699621140957, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.013480699621140957, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.mlp.up_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.2515890598297119, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.23570778965950012, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.23001575469970703, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.20678165555000305, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.11816681921482086, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.1134384274482727, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.13424305617809296, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.12342024594545364, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.1205473467707634, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.10750199109315872, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.09883567690849304, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.06851053982973099, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.05915619060397148, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.05673529952764511, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.05615474283695221, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.034253157675266266, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.029163729399442673, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.028968647122383118, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.026979658752679825, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.025976475328207016, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.01796712726354599, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.017715858295559883, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.017098475247621536, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.011581133119761944, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.01796712726354599, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.01796712726354599, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.mlp.down_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1711869673295454, "total_bits": 25043200.0, "err": 0.2669700086116791, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 27402496.0, "err": 0.23262932896614075, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 30286080.0, "err": 0.21890343725681305, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.716641512784091, "total_bits": 31334656.0, "err": 0.1919213831424713, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.216641512784091, "total_bits": 37101824.0, "err": 0.12379363179206848, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7393687855113638, "total_bits": 43131136.0, "err": 0.10852765291929245, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 34965568.0, "err": 0.1488180309534073, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 36053248.0, "err": 0.13516801595687866, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1711869673295454, "total_bits": 36577536.0, "err": 0.12821543216705322, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.534823330965909, "total_bits": 40771840.0, "err": 0.10166028887033463, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.653775301846591, "total_bits": 42143872.0, "err": 0.09669187664985657, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 46499904.0, "err": 0.07659517973661423, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 47587584.0, "err": 0.06526357680559158, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.216641512784091, "total_bits": 48636160.0, "err": 0.06004204601049423, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.307550603693182, "total_bits": 49684736.0, "err": 0.05875426158308983, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 58034240.0, "err": 0.03857685253024101, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.216641512784091, "total_bits": 60170496.0, "err": 0.03191733360290527, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.307550603693182, "total_bits": 61219072.0, "err": 0.03146815672516823, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.534823330965909, "total_bits": 63840512.0, "err": 0.02764926292002201, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.693914240056818, "total_bits": 65675520.0, "err": 0.027084659785032272, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 69568576.0, "err": 0.020937008783221245, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 70656256.0, "err": 0.021274732425808907, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.019185151904821396, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.015444843098521233, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.015444843098521233, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.015444843098521233, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.self_attn.q_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.0914701297879219, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.08383642882108688, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.07923587411642075, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.07023727893829346, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.04220469295978546, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.039072662591934204, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.05331847071647644, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.04784121364355087, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.04356464743614197, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.03780915588140488, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.03512846305966377, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.02704157866537571, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.02291143871843815, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.02038852497935295, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.019742904230952263, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.013543362729251385, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.010675598867237568, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.010397648438811302, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.009797566570341587, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.009159336797893047, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.007136333268135786, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.007052417378872633, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.006161304656416178, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.004783615004271269, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.013543362729251385, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.013543362729251385, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.self_attn.k_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.08772552758455276, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.08019088208675385, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.0747118815779686, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.06578376889228821, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.04005058482289314, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.03637610748410225, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.051812030375003815, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.047025613486766815, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.04128769040107727, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.0357942171394825, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.03334372118115425, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.02624059095978737, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.022526662796735764, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.019298795610666275, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.0184464231133461, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.0131407231092453, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.00992895383387804, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.009552648290991783, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.009082913398742676, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.008306046947836876, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.006749467924237251, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.00654053408652544, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.005611704662442207, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.0040666405111551285, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.0184464231133461, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.0184464231133461, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.14.self_attn.v_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.21967890858650208, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.20228450000286102, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.1942940503358841, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.17259757220745087, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.10175573825836182, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.09557653963565826, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.12578898668289185, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.11039109528064728, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.10459171235561371, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.09106624126434326, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.08491253107786179, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.0645589828491211, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.05280279740691185, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.0486513152718544, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.04763993248343468, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.03212050721049309, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.024752570316195488, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.024335172027349472, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.022541724145412445, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.021308455616235733, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.016367686912417412, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.015028931200504303, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.014171285554766655, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.00900967326015234, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.016367686912417412, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.016367686912417412, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.self_attn.o_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.21396510303020477, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.18883058428764343, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.17441973090171814, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.15028516948223114, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.09545327723026276, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.08562273532152176, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.12661388516426086, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.1137416735291481, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.10145457834005356, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.08366265892982483, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.07737608253955841, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.0648297369480133, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.05522088333964348, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.0469767227768898, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.04485016688704491, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.03281029313802719, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.02593674138188362, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.02514323778450489, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.02372685633599758, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.02192680351436138, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.018117481842637062, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.019008196890354156, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.015585271641612053, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.014469177462160587, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.018117481842637062, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.018117481842637062, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.mlp.gate_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.1918306201696396, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.17963570356369019, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.17476508021354675, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.15739689767360687, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.0899774432182312, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.08616667985916138, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.10300999879837036, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.09484218806028366, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.09182735532522202, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.08188708871603012, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.07547503709793091, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.05263273045420647, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.04552002251148224, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.04329294338822365, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.04276340454816818, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.026345273479819298, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.02254064381122589, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.022366086021065712, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.02093648351728916, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.020150968804955482, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.013933928683400154, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.014190063811838627, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.013156137429177761, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.00978503655642271, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.013933928683400154, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.013933928683400154, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.mlp.up_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.24933023750782013, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.23406653106212616, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.22824253141880035, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.20555613934993744, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.11699716001749039, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.11235684901475906, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.1329493373632431, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.12238234281539917, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.11932185292243958, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.1065271720290184, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.09802796691656113, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.06773503124713898, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.0585460364818573, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.05608990788459778, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.05552378296852112, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.033818017691373825, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.028719870373606682, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.028529813513159752, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.0265672504901886, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.025574512779712677, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.017516251653432846, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.017281167209148407, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.016610685735940933, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.011051141656935215, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.017516251653432846, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.017516251653432846, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.mlp.down_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1711869673295454, "total_bits": 25043200.0, "err": 0.27398157119750977, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 27402496.0, "err": 0.2374899834394455, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 30286080.0, "err": 0.22319893538951874, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.716641512784091, "total_bits": 31334656.0, "err": 0.19531355798244476, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.216641512784091, "total_bits": 37101824.0, "err": 0.12715472280979156, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7393687855113638, "total_bits": 43131136.0, "err": 0.11141331493854523, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 34965568.0, "err": 0.15323778986930847, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 36053248.0, "err": 0.1388811618089676, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1711869673295454, "total_bits": 36577536.0, "err": 0.13179150223731995, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.534823330965909, "total_bits": 40771840.0, "err": 0.1037856861948967, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.653775301846591, "total_bits": 42143872.0, "err": 0.09890400618314743, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 46499904.0, "err": 0.07882484048604965, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 47587584.0, "err": 0.06720676273107529, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.216641512784091, "total_bits": 48636160.0, "err": 0.06179344654083252, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.307550603693182, "total_bits": 49684736.0, "err": 0.06045081838965416, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 58034240.0, "err": 0.039805296808481216, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.216641512784091, "total_bits": 60170496.0, "err": 0.032996244728565216, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.307550603693182, "total_bits": 61219072.0, "err": 0.03252584487199783, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.534823330965909, "total_bits": 63840512.0, "err": 0.02850428782403469, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.693914240056818, "total_bits": 65675520.0, "err": 0.0279066264629364, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 69568576.0, "err": 0.021782344207167625, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 70656256.0, "err": 0.022177107632160187, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.01993873342871666, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.016252821311354637, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.016252821311354637, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.016252821311354637, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.self_attn.q_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.11138342320919037, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.10220823436975479, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.09608649462461472, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.08554921299219131, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.05154147744178772, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.04737614840269089, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.06619630008935928, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.05926002934575081, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.05309631675481796, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.046251799911260605, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.043610308319330215, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.03376747667789459, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.02848837897181511, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.024916794151067734, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.024025525897741318, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.016932975500822067, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.013098307885229588, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.012677372433245182, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.012061749584972858, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.011232880875468254, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.008899924345314503, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.008772839792072773, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.0075738257728517056, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.005967267788946629, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.016932975500822067, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.016932975500822067, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.self_attn.k_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.09135255962610245, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.08305193483829498, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.07667828351259232, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.06791587173938751, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.041637662798166275, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.03734886273741722, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.0547490268945694, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.04976751655340195, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.0430418998003006, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.037193719297647476, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.03481792286038399, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.02764100395143032, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.02387370727956295, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.019975224509835243, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.019011419266462326, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.013819126412272453, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.01038688700646162, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.009895781986415386, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.009503927081823349, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.008650529198348522, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.0072257970459759235, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.006969847716391087, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.005911092273890972, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.004388066940009594, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.013819126412272453, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.013819126412272453, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.self_attn.v_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.22370971739292145, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.20468583703041077, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.19306719303131104, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.172208771109581, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.10335417091846466, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.09488523751497269, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.13365966081619263, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.1174527257680893, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.10666721314191818, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.09255466610193253, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.08839823305606842, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.06958716362714767, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.05647723376750946, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.04960767179727554, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.04790278524160385, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.03516813740134239, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.025353966280817986, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.024523979052901268, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.023098798468708992, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.02138647809624672, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.01794210448861122, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.016018420457839966, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.014556785114109516, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.009529726579785347, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.01794210448861122, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.01794210448861122, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.self_attn.o_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.21339060366153717, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.193411722779274, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.1863357126712799, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.16293972730636597, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.09740103781223297, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.09190396219491959, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.11426423490047455, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.10395738482475281, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.10027637332677841, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.08581775426864624, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.07780348509550095, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.058404240757226944, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.04985695704817772, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.04685468226671219, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.046121641993522644, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.029228130355477333, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.024405090138316154, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.02416626550257206, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.022117307409644127, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.021090582013130188, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.01546215359121561, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.015489639714360237, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.014487620443105698, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.010627519339323044, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.01546215359121561, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.01546215359121561, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.mlp.gate_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.19472761452198029, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.1819058507680893, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.17668914794921875, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.15905539691448212, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.09105214476585388, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.0870676264166832, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.10453005880117416, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.09642495214939117, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.09303394705057144, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.08284606784582138, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.07626061886548996, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.05330970138311386, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.04617984592914581, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.043732814490795135, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.0431382842361927, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.026636511087417603, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.02258455380797386, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.02237706258893013, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.02090940997004509, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.020069941878318787, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.013930359855294228, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.01396482065320015, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.01308413129299879, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.00926248263567686, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.013930359855294228, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.013930359855294228, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.mlp.up_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.2456999570131302, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.2300603985786438, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.22413067519664764, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.20180970430374146, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.11505863815546036, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.11029855161905289, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.13129092752933502, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.12086579948663712, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.11747565120458603, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.10473943501710892, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.09644684195518494, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.06675417721271515, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.057797469198703766, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.05514698103070259, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.05451858043670654, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.03336753323674202, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.028232697397470474, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.028009653091430664, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.026111934334039688, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.025089595466852188, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.017286639660596848, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.017018036916851997, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.016319066286087036, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.010836437344551086, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.017286639660596848, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.017286639660596848, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.mlp.down_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1711869673295454, "total_bits": 25043200.0, "err": 0.2658386826515198, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 27402496.0, "err": 0.22968772053718567, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 30286080.0, "err": 0.21422652900218964, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.716641512784091, "total_bits": 31334656.0, "err": 0.18739867210388184, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.216641512784091, "total_bits": 37101824.0, "err": 0.1229621171951294, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7393687855113638, "total_bits": 43131136.0, "err": 0.1067287027835846, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 34965568.0, "err": 0.15029630064964294, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 36053248.0, "err": 0.13670244812965393, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1711869673295454, "total_bits": 36577536.0, "err": 0.12783503532409668, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.534823330965909, "total_bits": 40771840.0, "err": 0.10055951029062271, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.653775301846591, "total_bits": 42143872.0, "err": 0.09584618359804153, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 46499904.0, "err": 0.0772436335682869, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 47587584.0, "err": 0.0661146268248558, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.216641512784091, "total_bits": 48636160.0, "err": 0.05985488370060921, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.307550603693182, "total_bits": 49684736.0, "err": 0.05830620974302292, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 58034240.0, "err": 0.038979921489953995, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.216641512784091, "total_bits": 60170496.0, "err": 0.032121021300554276, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.307550603693182, "total_bits": 61219072.0, "err": 0.03155366703867912, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.534823330965909, "total_bits": 63840512.0, "err": 0.027838576585054398, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.693914240056818, "total_bits": 65675520.0, "err": 0.027114253491163254, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 69568576.0, "err": 0.021276243031024933, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 70656256.0, "err": 0.0219187643378973, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.019220780581235886, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.016175666823983192, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.016175666823983192, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.016175666823983192, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.self_attn.q_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.10835552960634232, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.0997185930609703, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.09383293986320496, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.08342709392309189, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.05016060173511505, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.046240031719207764, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.06529120355844498, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.057656899094581604, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.05167088657617569, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.04499927535653114, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.04264628514647484, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.03342291712760925, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.02765817381441593, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.024282895028591156, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.02342332899570465, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.016823837533593178, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.012774858623743057, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.012384594418108463, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.011757723055779934, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.010965535417199135, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.008920084685087204, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.008583901450037956, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.007559042889624834, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.005883726757019758, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.016823837533593178, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.016823837533593178, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.self_attn.k_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.0919589102268219, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.08453307300806046, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.0783795416355133, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.06978116929531097, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.042367275804281235, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.038185540586709976, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.05482036620378494, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.05036330595612526, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.04359830915927887, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.03809261694550514, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.035612866282463074, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.027856791391968727, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.024250909686088562, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.020380627363920212, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.0193968266248703, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.013907892629504204, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.01049881149083376, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.00999902281910181, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.009649118408560753, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.008790917694568634, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.007166659459471703, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.0069610802456736565, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.005896824412047863, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.004252063110470772, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.013907892629504204, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.013907892629504204, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.self_attn.v_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.24144874513149261, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.22311249375343323, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.2153671681880951, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.1916557401418686, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.1121601089835167, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.10599581152200699, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.1353422999382019, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.12029977887868881, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.11512073874473572, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.10070016235113144, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.09360124170780182, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.06988874822854996, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.057580094784498215, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.05380512773990631, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.052929122000932693, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.035108473151922226, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.02720879577100277, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.026897676289081573, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.02480987086892128, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.02352542243897915, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.017855197191238403, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.016228286549448967, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.015702437609434128, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.009522533975541592, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.017855197191238403, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.017855197191238403, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.self_attn.o_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.20650714635849, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.18665507435798645, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.17977628111839294, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.15521804988384247, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.09482021629810333, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.08916931599378586, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.11066525429487228, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.10067214071750641, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.09767214208841324, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.08228810131549835, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.07325928658246994, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.05656377598643303, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.048270490020513535, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.045619770884513855, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.04498137906193733, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.02828359417617321, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.023690206930041313, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.023507405072450638, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.021176720038056374, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.020117389038205147, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.014946790412068367, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.014906751923263073, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.014064308255910873, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.010159808211028576, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.014946790412068367, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.014946790412068367, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.mlp.gate_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.1826363205909729, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.17066752910614014, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.16586945950984955, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.14927330613136292, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.08542028069496155, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.0816306620836258, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.09780541062355042, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.09031973779201508, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.08733763545751572, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.07767613977193832, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.07150726020336151, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.04987999051809311, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.04328594356775284, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.04101983457803726, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.040480632334947586, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.024949626997113228, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.021232839673757553, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.02104131132364273, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.01966947130858898, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.018887080252170563, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.013110151514410973, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.01318286545574665, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.012333245016634464, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.008826695382595062, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.013110151514410973, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.013110151514410973, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.mlp.up_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.23575305938720703, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.22064422070980072, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.2150995135307312, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.19362887740135193, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.11045368760824203, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.10591120272874832, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.125750333070755, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.11582791060209274, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.1127910166978836, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.10054312646389008, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.09263308346271515, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.06402270495891571, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.055428873747587204, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.05300380662083626, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.05240403115749359, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.03200432285666466, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.027251489460468292, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.02706112153828144, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.025236602872610092, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.024283116683363914, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.016731515526771545, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.016597062349319458, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.015884434804320335, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.010846097953617573, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.016731515526771545, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.016731515526771545, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.mlp.down_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1711869673295454, "total_bits": 25043200.0, "err": 0.2526156008243561, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 27402496.0, "err": 0.2168654501438141, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 30286080.0, "err": 0.20212945342063904, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.716641512784091, "total_bits": 31334656.0, "err": 0.17690370976924896, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.216641512784091, "total_bits": 37101824.0, "err": 0.1164567843079567, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7393687855113638, "total_bits": 43131136.0, "err": 0.10072964429855347, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 34965568.0, "err": 0.14258594810962677, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 36053248.0, "err": 0.12918175756931305, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1711869673295454, "total_bits": 36577536.0, "err": 0.12118379771709442, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.534823330965909, "total_bits": 40771840.0, "err": 0.09483376145362854, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.653775301846591, "total_bits": 42143872.0, "err": 0.09038927406072617, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 46499904.0, "err": 0.07300974428653717, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 47587584.0, "err": 0.06251097470521927, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.216641512784091, "total_bits": 48636160.0, "err": 0.05671590939164162, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.307550603693182, "total_bits": 49684736.0, "err": 0.055270057171583176, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 58034240.0, "err": 0.036860596388578415, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.216641512784091, "total_bits": 60170496.0, "err": 0.030507436022162437, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.307550603693182, "total_bits": 61219072.0, "err": 0.02999044768512249, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.534823330965909, "total_bits": 63840512.0, "err": 0.026396289467811584, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.693914240056818, "total_bits": 65675520.0, "err": 0.025726189836859703, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 69568576.0, "err": 0.02009446918964386, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 70656256.0, "err": 0.020882323384284973, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.01818051002919674, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.015494086779654026, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.01818051002919674, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.01818051002919674, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.17.self_attn.q_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.10287334769964218, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.09480037540197372, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.0891851857304573, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.07939653098583221, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.047688256949186325, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.043954331427812576, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.060440368950366974, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.05473128706216812, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.049062538892030716, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.0427822582423687, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.040001995861530304, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.030657749623060226, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.026217542588710785, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.023057691752910614, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.022261841222643852, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.01536988653242588, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.012088276445865631, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.011722280643880367, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.011113694868981838, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.010361538268625736, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.008126772940158844, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.008056229911744595, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.00705037172883749, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.005458120256662369, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.01536988653242588, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.01536988653242588, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.self_attn.k_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.09453245252370834, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.0868435874581337, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.07913621515035629, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.07043753564357758, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.04310666024684906, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.03845592960715294, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.05763213336467743, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.05337569862604141, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.04444042220711708, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.03887593001127243, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.03649435192346573, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.02903776429593563, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.02523522824048996, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.020788945257663727, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.019565576687455177, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.01449779886752367, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.010745828039944172, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.01012158952653408, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.009893878363072872, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.00887511670589447, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.007527099456638098, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.007284495513886213, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.006032176781445742, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.004429738037288189, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.01449779886752367, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.01449779886752367, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.self_attn.v_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.257646769285202, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.23443584144115448, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.2241281121969223, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.19841350615024567, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.11847078800201416, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.11069971323013306, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.1467517912387848, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.13005167245864868, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.1226075068116188, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.10519519448280334, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.09829740971326828, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.07645463198423386, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.062124304473400116, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.056889817118644714, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.05554128438234329, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.03899448737502098, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.028963567689061165, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.02842593565583229, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.02615770325064659, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.024627884849905968, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.020062245428562164, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.017765821889042854, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.016759075224399567, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.010800382122397423, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.017765821889042854, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.017765821889042854, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.self_attn.o_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.2674570679664612, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.24616876244544983, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.23816132545471191, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.20934471487998962, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.12419641762971878, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.11761936545372009, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.14483405649662018, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.1319396197795868, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.1275409758090973, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.1103593036532402, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.1001860573887825, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.07415809482336044, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.06334298849105835, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.0598079115152359, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.05900099128484726, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.0371304489672184, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.031045187264680862, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.030766304582357407, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.028217552229762077, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.02688690274953842, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.019550470635294914, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.019482629373669624, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.018391340970993042, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.013171457685530186, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.018391340970993042, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.018391340970993042, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.17.mlp.gate_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.20193931460380554, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.1886550784111023, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.18358057737350464, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.1652774065732956, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.09451817721128464, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.09052573144435883, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.10756891220808029, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.09940291196107864, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.09659185260534286, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.08597752451896667, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.07900794595479965, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.05487178638577461, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.047623928636312485, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.04539214074611664, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.04486393928527832, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.027422089129686356, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.02345466800034046, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.023272348567843437, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.021709738299250603, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.02088790014386177, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.014411281794309616, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.01445647794753313, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.01366474013775587, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.009642625227570534, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.014411281794309616, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.014411281794309616, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.mlp.up_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.2427755445241928, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.2271476835012436, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.22151659429073334, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.19937390089035034, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.11364296078681946, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.10906180739402771, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.12889474630355835, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.11891090124845505, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.11603440344333649, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.10339333862066269, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.09500468522310257, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.06568826735019684, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.05685841664671898, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.054473958909511566, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.05390556529164314, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.03279629349708557, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.02790847048163414, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.02772563137114048, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.02579675428569317, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.024834508076310158, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.017080575227737427, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.01682833768427372, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.01623593270778656, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.010799326002597809, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.017080575227737427, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.017080575227737427, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.mlp.down_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1711869673295454, "total_bits": 25043200.0, "err": 0.26719895005226135, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 27402496.0, "err": 0.2336321771144867, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 30286080.0, "err": 0.2205095738172531, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.716641512784091, "total_bits": 31334656.0, "err": 0.19318769872188568, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.216641512784091, "total_bits": 37101824.0, "err": 0.12387660145759583, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7393687855113638, "total_bits": 43131136.0, "err": 0.1092633455991745, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 34965568.0, "err": 0.14805968105793, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 36053248.0, "err": 0.1349031776189804, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1711869673295454, "total_bits": 36577536.0, "err": 0.12823468446731567, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.534823330965909, "total_bits": 40771840.0, "err": 0.10224740952253342, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.653775301846591, "total_bits": 42143872.0, "err": 0.09676868468523026, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 46499904.0, "err": 0.07560619711875916, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 47587584.0, "err": 0.06500295549631119, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.216641512784091, "total_bits": 48636160.0, "err": 0.06001017987728119, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.307550603693182, "total_bits": 49684736.0, "err": 0.05878392606973648, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 58034240.0, "err": 0.03799458593130112, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.216641512784091, "total_bits": 60170496.0, "err": 0.03170071914792061, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.307550603693182, "total_bits": 61219072.0, "err": 0.03126530721783638, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.534823330965909, "total_bits": 63840512.0, "err": 0.02747083082795143, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.693914240056818, "total_bits": 65675520.0, "err": 0.026939021423459053, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 69568576.0, "err": 0.020481955260038376, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 70656256.0, "err": 0.020849352702498436, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.01885034143924713, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.014850732870399952, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.014850732870399952, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.014850732870399952, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.self_attn.q_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.11655479669570923, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.1067172959446907, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.10005857795476913, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.08945091813802719, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.05414677783846855, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.049697596579790115, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.06890136748552322, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.06234990060329437, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.05575655773282051, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.04845481738448143, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.04530069977045059, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.035206012427806854, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.02999150939285755, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.02620181068778038, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.025226151570677757, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.017690978944301605, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.013746451586484909, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.013308592140674591, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.012605151161551476, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.011735965497791767, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.009314976632595062, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.00919970404356718, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.007999892346560955, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.0062545002438127995, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.017690978944301605, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.017690978944301605, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.self_attn.k_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.0944632962346077, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.0863899365067482, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.07761706411838531, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.06930334120988846, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.04325743764638901, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.037777338176965714, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.06000420078635216, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.05485640838742256, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.0447283610701561, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.039070408791303635, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.03693010285496712, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.03036189079284668, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.026254918426275253, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.02089102752506733, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.019357968121767044, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.01520294975489378, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.010751367546617985, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.009999023750424385, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.009882929734885693, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.008716129697859287, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.007813852280378342, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.007431125268340111, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.005991381127387285, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.00443867314606905, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.01520294975489378, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.01520294975489378, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.self_attn.v_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.23922155797481537, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.21438363194465637, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.1989637315273285, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.1762741506099701, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.10952737182378769, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.09817896783351898, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.14545004069805145, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.12862208485603333, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.11422882974147797, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.0967601090669632, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.09301814436912537, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.07651805877685547, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.06216345354914665, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.052770618349313736, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.05025201290845871, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.039042264223098755, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.02714712731540203, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.025999631732702255, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.02455533668398857, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.0223067756742239, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.02019917406141758, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.017857857048511505, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.01539327297359705, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.010851861909031868, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.017857857048511505, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.017857857048511505, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.self_attn.o_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.18861940503120422, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.16604261100292206, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.15938234329223633, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.13699805736541748, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.08653298765420914, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.08044371753931046, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.09962111711502075, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.0908968523144722, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.08936558663845062, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.07247138768434525, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.06524728238582611, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.051051802933216095, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.04363543540239334, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.04167444258928299, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.041208527982234955, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.025610899552702904, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.021761367097496986, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.02166146971285343, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.018967390060424805, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.018061533570289612, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.013741817325353622, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.013757700100541115, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.013119636103510857, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.009623628109693527, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.018061533570289612, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.018061533570289612, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.18.mlp.gate_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.20604068040847778, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.19252876937389374, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.18762919306755066, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.1686421036720276, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.09650472551584244, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.09252294898033142, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.10942137241363525, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.10121779888868332, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.09863170236349106, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.08774061501026154, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.08061869442462921, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.05577550083398819, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.04849028214812279, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.04635975509881973, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.0458536259829998, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.027866370975971222, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.02395603060722351, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.02380303293466568, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.02217106521129608, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.02134556695818901, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.014585123397409916, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.014768963679671288, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.013865423388779163, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.009881766512989998, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.014585123397409916, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.014585123397409916, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.mlp.up_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.2399473935365677, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.2244538962841034, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.21901071071624756, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.19715498387813568, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.11234885454177856, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.1078866571187973, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.12676526606082916, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.11725227534770966, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.11474763602018356, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.10216903686523438, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.09383323788642883, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.06446701288223267, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.056045882403850555, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.05381309986114502, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.0532679408788681, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.03218846768140793, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.02747514843940735, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.027309870347380638, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.025377893820405006, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.02442767471075058, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.016640137881040573, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.016404137015342712, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.01587318629026413, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.01032816618680954, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.016640137881040573, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.016640137881040573, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.mlp.down_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1711869673295454, "total_bits": 25043200.0, "err": 0.2546171545982361, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 27402496.0, "err": 0.22209659218788147, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 30286080.0, "err": 0.20920386910438538, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.716641512784091, "total_bits": 31334656.0, "err": 0.1833164244890213, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.216641512784091, "total_bits": 37101824.0, "err": 0.11774744838476181, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7393687855113638, "total_bits": 43131136.0, "err": 0.10365954041481018, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 34965568.0, "err": 0.14071422815322876, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 36053248.0, "err": 0.12873519957065582, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1711869673295454, "total_bits": 36577536.0, "err": 0.121908999979496, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.534823330965909, "total_bits": 40771840.0, "err": 0.09717921912670135, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.653775301846591, "total_bits": 42143872.0, "err": 0.09189856797456741, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 46499904.0, "err": 0.0720718652009964, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 47587584.0, "err": 0.06203669682145119, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.216641512784091, "total_bits": 48636160.0, "err": 0.05697467923164368, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.307550603693182, "total_bits": 49684736.0, "err": 0.055697180330753326, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 58034240.0, "err": 0.03626292198896408, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.216641512784091, "total_bits": 60170496.0, "err": 0.030020730569958687, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.307550603693182, "total_bits": 61219072.0, "err": 0.0295855849981308, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.534823330965909, "total_bits": 63840512.0, "err": 0.02601107396185398, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.693914240056818, "total_bits": 65675520.0, "err": 0.025451071560382843, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 69568576.0, "err": 0.019475867971777916, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 70656256.0, "err": 0.019723203033208847, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.017786100506782532, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.013922353275120258, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.017786100506782532, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.017786100506782532, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.19.self_attn.q_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.11124002933502197, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.10199092328548431, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.09509371966123581, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.0850197896361351, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.051437459886074066, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.04698868840932846, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.0665818303823471, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.06034000590443611, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.05318630486726761, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.046297814697027206, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.04337248206138611, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.033809445798397064, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.02883305959403515, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.024884525686502457, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.02386108599603176, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.01699492335319519, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.013089971616864204, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.012605763971805573, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.012080672197043896, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.011166684329509735, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.009043031372129917, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.008867415599524975, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.007661962881684303, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.005962160881608725, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.01699492335319519, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.01699492335319519, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.self_attn.k_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.09511987864971161, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.08716574311256409, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.07847234606742859, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.06999669224023819, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.04347958415746689, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.03822968527674675, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.059488311409950256, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.05513539910316467, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.044921405613422394, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.03929390385746956, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.03700900822877884, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.030168315395712852, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.02630390040576458, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.021060701459646225, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.019625348970294, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.015043726190924644, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.01103578507900238, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.010334583930671215, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.010189012624323368, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.009097917005419731, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.008001643233001232, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.007774730212986469, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.006403092294931412, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.004999631084501743, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.015043726190924644, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.015043726190924644, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.self_attn.v_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.2536112666130066, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.22972258925437927, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.21687087416648865, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.19212858378887177, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.11649534851312637, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.10708265751600266, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.15059565007686615, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.1319609433412552, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.12088989466428757, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.10324164479970932, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.09781525284051895, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.07855373620986938, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.0632183626294136, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.05607860907912254, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.05422336980700493, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.03982405737042427, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.02869051694869995, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.02798416092991829, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.02594003640115261, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.024154480546712875, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.02042417973279953, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.018196139484643936, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.016359802335500717, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.011169448494911194, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.018196139484643936, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.018196139484643936, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.self_attn.o_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.22078454494476318, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.2018737941980362, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.195095032453537, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.17005838453769684, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.10241402685642242, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.09685000777244568, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.11898934096097946, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.10824333876371384, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.10524710267782211, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.0898488312959671, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.08138139545917511, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.061028074473142624, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.05192410200834274, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.04927372559905052, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.048643045127391815, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.030608000233769417, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.025559503585100174, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.02535102516412735, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.02302393689751625, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.021976882591843605, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.01619342342019081, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.01597973145544529, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.015277339145541191, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.010848721489310265, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.01619342342019081, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.01619342342019081, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.mlp.gate_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.21317525207996368, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.19915688037872314, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.1941615343093872, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.17461974918842316, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.09990294277667999, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.09580037742853165, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.11309703439474106, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.10466962307691574, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.10209377110004425, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.09076319634914398, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.08332545310258865, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.05762019008398056, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.050127819180488586, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.04796493053436279, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.047428376972675323, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.028811505064368248, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.024670295417308807, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.02451474405825138, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.022791381925344467, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.021935071796178818, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.01504198182374239, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.015030491165816784, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.014311734586954117, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.00984308123588562, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.01504198182374239, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.01504198182374239, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.mlp.up_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.23644889891147614, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.2212282121181488, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.21589505672454834, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.19438733160495758, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.11086287349462509, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.10650663077831268, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.12496989220380783, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.1156633272767067, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.11317553371191025, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.10077164322137833, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.0923803299665451, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.0634440928697586, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.05526787415146828, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.053095925599336624, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.052576083689928055, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.03169521316885948, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.02702878601849079, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.02687189355492592, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.024941179901361465, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.023999322205781937, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.016327152028679848, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.016039082780480385, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.015587265603244305, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.009958683513104916, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.016327152028679848, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.016327152028679848, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.mlp.down_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1711869673295454, "total_bits": 25043200.0, "err": 0.2409789264202118, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 27402496.0, "err": 0.21056880056858063, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 30286080.0, "err": 0.19823765754699707, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.716641512784091, "total_bits": 31334656.0, "err": 0.17335030436515808, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.216641512784091, "total_bits": 37101824.0, "err": 0.11150369048118591, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7393687855113638, "total_bits": 43131136.0, "err": 0.09815803170204163, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 34965568.0, "err": 0.13372954726219177, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 36053248.0, "err": 0.12217946350574493, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1711869673295454, "total_bits": 36577536.0, "err": 0.11539112031459808, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.534823330965909, "total_bits": 40771840.0, "err": 0.0920930877327919, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.653775301846591, "total_bits": 42143872.0, "err": 0.08704172074794769, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 46499904.0, "err": 0.06840888410806656, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 47587584.0, "err": 0.05888576805591583, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.216641512784091, "total_bits": 48636160.0, "err": 0.05402100458741188, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.307550603693182, "total_bits": 49684736.0, "err": 0.05282817780971527, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 58034240.0, "err": 0.03444943577051163, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.216641512784091, "total_bits": 60170496.0, "err": 0.02847410924732685, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.307550603693182, "total_bits": 61219072.0, "err": 0.028061630204319954, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.534823330965909, "total_bits": 63840512.0, "err": 0.024674365296959877, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.693914240056818, "total_bits": 65675520.0, "err": 0.024162814021110535, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 69568576.0, "err": 0.01863090693950653, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 70656256.0, "err": 0.018743444234132767, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.01707860827445984, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.013275353237986565, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.01707860827445984, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.01707860827445984, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.20.self_attn.q_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.10806138813495636, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.09988716244697571, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.09410612285137177, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.08433832973241806, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.0503302738070488, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.04646226391196251, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.0648786798119545, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.057732149958610535, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.05169706791639328, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.04550211504101753, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.04265289381146431, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.03313068300485611, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.027674362063407898, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.024327632039785385, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.02347905933856964, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.016640031710267067, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.012708893977105618, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.0123210484161973, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.011738506145775318, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.010959050618112087, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.008730190806090832, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.008415991440415382, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.007394076324999332, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.005638379603624344, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.016640031710267067, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.016640031710267067, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.self_attn.k_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.09938862174749374, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.09136397391557693, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.08361238241195679, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.07487663626670837, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.0457184761762619, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.040848758071660995, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.06047487258911133, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.05605786666274071, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.047158192843198776, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.041331321001052856, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.03870963305234909, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.03063185140490532, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.026692569255828857, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.02201395109295845, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.02076517790555954, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.01525269728153944, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.011375507339835167, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.010711602866649628, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.010492072440683842, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.009416592307388783, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.00794949196279049, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.007665130775421858, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.006384793668985367, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.004595444072037935, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.01525269728153944, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.01525269728153944, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.self_attn.v_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.26515263319015503, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.2443227469921112, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.23660428822040558, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.21036037802696228, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.12333326786756516, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.1168563961982727, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.14507097005844116, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.13130202889442444, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.12696008384227753, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.11048337817192078, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.10172034800052643, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.07437518984079361, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.06277415156364441, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.05896056070923805, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.05817916989326477, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.03721608966588974, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.02987844869494438, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.029544519260525703, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.027124321088194847, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.025828544050455093, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.018978390842676163, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.01768067479133606, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.017138328403234482, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.010339880362153053, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.01768067479133606, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.01768067479133606, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.self_attn.o_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.22436340153217316, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.20353896915912628, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.19667012989521027, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.16911004483699799, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.10374949872493744, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.09774988889694214, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.11958780139684677, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.10942737758159637, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.10653515160083771, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.09031932801008224, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.08008498698472977, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.06129561737179756, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.05250474065542221, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.049887318164110184, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.049266062676906586, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.030716922134160995, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.025863874703645706, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.025687208399176598, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.0231926292181015, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.02201337367296219, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.016290510073304176, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.016148874536156654, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.01542442012578249, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.01092488318681717, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.016290510073304176, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.016290510073304176, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.mlp.gate_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.2165696769952774, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.20235225558280945, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.19731289148330688, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.1776660531759262, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.10155218839645386, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.0973137691617012, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.11497136950492859, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.10637157410383224, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.10373358428478241, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.09221002459526062, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.08475962281227112, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.058641329407691956, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.05097620561718941, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.04874097928404808, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.048211291432380676, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.029319774359464645, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.025110172107815742, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.024941517040133476, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.023202933371067047, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.022331688553094864, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.015344283543527126, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.015364223159849644, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.014599544927477837, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.010125892236828804, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.015344283543527126, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.015344283543527126, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.mlp.up_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.2267385870218277, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.21218755841255188, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.20715586841106415, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.18646876513957977, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.10633242875337601, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.10216419398784637, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.1198166087269783, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.11090850085020065, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.1085347905755043, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.09663990885019302, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.08871474117040634, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.060911428183317184, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.053013987839221954, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.05094388127326965, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.050437841564416885, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.030414028093218803, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.0259830541908741, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.02583875134587288, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.023980269208550453, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.023080166429281235, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.01572939194738865, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.015469500795006752, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.015015847980976105, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.009701823815703392, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.01572939194738865, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.01572939194738865, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.mlp.down_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1711869673295454, "total_bits": 25043200.0, "err": 0.21755234897136688, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 27402496.0, "err": 0.190932035446167, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 30286080.0, "err": 0.18020890653133392, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.716641512784091, "total_bits": 31334656.0, "err": 0.15758679807186127, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.216641512784091, "total_bits": 37101824.0, "err": 0.10082969069480896, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7393687855113638, "total_bits": 43131136.0, "err": 0.08914991468191147, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 34965568.0, "err": 0.12090960144996643, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 36053248.0, "err": 0.10996213555335999, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1711869673295454, "total_bits": 36577536.0, "err": 0.10411453247070312, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.534823330965909, "total_bits": 40771840.0, "err": 0.08345960080623627, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.653775301846591, "total_bits": 42143872.0, "err": 0.07906251400709152, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 46499904.0, "err": 0.06190558895468712, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 47587584.0, "err": 0.05307422950863838, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.216641512784091, "total_bits": 48636160.0, "err": 0.04887976869940758, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.307550603693182, "total_bits": 49684736.0, "err": 0.04785580560564995, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 58034240.0, "err": 0.0312453955411911, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.216641512784091, "total_bits": 60170496.0, "err": 0.025922968983650208, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.307550603693182, "total_bits": 61219072.0, "err": 0.025547675788402557, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.534823330965909, "total_bits": 63840512.0, "err": 0.022544553503394127, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.693914240056818, "total_bits": 65675520.0, "err": 0.02209124155342579, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 69568576.0, "err": 0.017027074471116066, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 70656256.0, "err": 0.017186099663376808, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.0156759824603796, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.012373584322631359, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 69568576.0, "err": 0.017027074471116066, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 69568576.0, "err": 0.017027074471116066, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.self_attn.q_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.11640973389148712, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.10758989304304123, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.10113480687141418, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.09061843156814575, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.054201606661081314, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.049854718148708344, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.06910084933042526, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.062493178993463516, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.05572878569364548, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.04899469017982483, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.045633818954229355, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.035222459584474564, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.030037924647331238, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.026159407570958138, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.025195740163326263, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.017673008143901825, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.013724684715270996, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.013260222971439362, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.012686079367995262, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.011789805255830288, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.009337055496871471, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.00916233565658331, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.007981853559613228, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.006162540055811405, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.017673008143901825, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.017673008143901825, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.self_attn.k_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.1077682375907898, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.09926189482212067, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.09242388606071472, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.08264816552400589, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.049672458320856094, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.045219480991363525, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.06362999975681305, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.05857723951339722, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.051205117255449295, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.044904254376888275, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.04186388477683067, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.03226058557629585, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.028057798743247986, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.023955659940838814, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.02286597155034542, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.016191070899367332, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.012376700527966022, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.011868081986904144, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.011387000791728497, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.010461408644914627, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.008475066162645817, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.008187970146536827, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.007170950062572956, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.005139200948178768, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.016191070899367332, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.016191070899367332, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.self_attn.v_proj", "numel": 524288, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.193359375, "total_bits": 1149952.0, "err": 0.26336389780044556, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.380859375, "total_bits": 1248256.0, "err": 0.24281629920005798, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.630859375, "total_bits": 1379328.0, "err": 0.2357195019721985, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.755859375, "total_bits": 1444864.0, "err": 0.20921479165554047, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.255859375, "total_bits": 1707008.0, "err": 0.1224743127822876, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.693359375, "total_bits": 1936384.0, "err": 0.11654330044984818, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03271484375, "total_bits": 1590016.0, "err": 0.1422700732946396, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.130859375, "total_bits": 1641472.0, "err": 0.128847137093544, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.193359375, "total_bits": 1674240.0, "err": 0.12587769329547882, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.505859375, "total_bits": 1838080.0, "err": 0.10952015221118927, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6904296875, "total_bits": 1934848.0, "err": 0.10004620999097824, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03271484375, "total_bits": 2114304.0, "err": 0.07266207784414291, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.130859375, "total_bits": 2165760.0, "err": 0.0616445355117321, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.255859375, "total_bits": 2231296.0, "err": 0.05864716321229935, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.380859375, "total_bits": 2296832.0, "err": 0.05794135108590126, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03271484375, "total_bits": 2638592.0, "err": 0.03629406541585922, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.255859375, "total_bits": 2755584.0, "err": 0.029593775048851967, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.380859375, "total_bits": 2821120.0, "err": 0.02939631976187229, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.505859375, "total_bits": 2886656.0, "err": 0.026895904913544655, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.818359375, "total_bits": 3050496.0, "err": 0.025668051093816757, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.018468711525201797, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.130859375, "total_bits": 3214336.0, "err": 0.01732046529650688, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28271484375, "total_bits": 3293952.0, "err": 0.01704171858727932, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.130859375, "total_bits": 4262912.0, "err": 0.010049533098936081, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.018468711525201797, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.03271484375, "total_bits": 3162880.0, "err": 0.018468711525201797, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.self_attn.o_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.188232421875, "total_bits": 9178112.0, "err": 0.14023897051811218, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 9964544.0, "err": 0.12651938199996948, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 11013120.0, "err": 0.12226581573486328, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750732421875, "total_bits": 11537408.0, "err": 0.10298319905996323, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250732421875, "total_bits": 13634560.0, "err": 0.06434453278779984, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.688232421875, "total_bits": 15469568.0, "err": 0.0607893131673336, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 12714752.0, "err": 0.07419272512197495, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 13110272.0, "err": 0.06755433976650238, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.188232421875, "total_bits": 13372416.0, "err": 0.06616700440645218, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500732421875, "total_bits": 14683136.0, "err": 0.05504888296127319, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6878662109375, "total_bits": 15468032.0, "err": 0.04845879599452019, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 16909056.0, "err": 0.03794630616903305, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 17304576.0, "err": 0.032515499740839005, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.250732421875, "total_bits": 17828864.0, "err": 0.031078392639756203, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.375732421875, "total_bits": 18353152.0, "err": 0.030732642859220505, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 21103360.0, "err": 0.019023161381483078, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.016439665108919144, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.375732421875, "total_bits": 22547456.0, "err": 0.016362909227609634, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.500732421875, "total_bits": 23071744.0, "err": 0.014640162698924541, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.813232421875, "total_bits": 24382464.0, "err": 0.013936098664999008, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 25297664.0, "err": 0.010236923582851887, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 25693184.0, "err": 0.010672894306480885, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.28143310546875, "total_bits": 26346240.0, "err": 0.009746757335960865, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 34081792.0, "err": 0.0077998754568398, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.016439665108919144, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.250732421875, "total_bits": 22023168.0, "err": 0.016439665108919144, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.21.mlp.gate_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.19421808421611786, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.18134595453739166, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.1767439842224121, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.15922586619853973, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.09157511591911316, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.08776701986789703, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.10399052500724792, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.09601280838251114, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.09347940981388092, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.08328733593225479, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.07671903818845749, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.05375383794307709, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.046805668622255325, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.04477023333311081, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.044286955147981644, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.027084670960903168, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.024326324462890625, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.0241770688444376, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.02273281291127205, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.022003933787345886, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.015085160732269287, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.016564197838306427, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.01443440280854702, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.012898731976747513, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.015085160732269287, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.015085160732269287, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.mlp.up_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.187766335227273, "total_bits": 25234432.000000004, "err": 0.1604851931333542, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375266335227273, "total_bits": 27397120.000000004, "err": 0.14996635913848877, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625266335227273, "total_bits": 30280704.000000004, "err": 0.14621257781982422, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.750266335227273, "total_bits": 31722496.000000004, "err": 0.13162951171398163, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.250266335227273, "total_bits": 37489664.0, "err": 0.07583200186491013, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.687766335227273, "total_bits": 42535936.0, "err": 0.07277326285839081, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313165838068183, "total_bits": 34964224.0, "err": 0.08604194968938828, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125266335227273, "total_bits": 36047872.0, "err": 0.0794556736946106, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.187766335227273, "total_bits": 36768768.0, "err": 0.07750936597585678, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.500266335227273, "total_bits": 40373248.0, "err": 0.06896455585956573, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6876331676136362, "total_bits": 42534400.0, "err": 0.06355980038642883, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031316583806818, "total_bits": 46498560.0, "err": 0.044492531567811966, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1252663352272725, "total_bits": 47582208.0, "err": 0.03884929418563843, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2502663352272725, "total_bits": 49024000.0, "err": 0.03726378083229065, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.3752663352272725, "total_bits": 50465792.0, "err": 0.03688808158040047, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031316583806818, "total_bits": 58032896.0, "err": 0.02251671999692917, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2502663352272725, "total_bits": 60558336.0, "err": 0.020532911643385887, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.3752663352272725, "total_bits": 62000128.0, "err": 0.020423313602805138, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.5002663352272725, "total_bits": 63441920.0, "err": 0.01925167255103588, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.8127663352272725, "total_bits": 67046400.0, "err": 0.018673434853553772, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.012773342430591583, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1252663352272725, "total_bits": 70650880.0, "err": 0.014308414421975613, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.281316583806818, "total_bits": 72450816.0, "err": 0.012276780791580677, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125266335227273, "total_bits": 93719552.0, "err": 0.011466095224022865, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.012773342430591583, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b 128g s4", "bpw": 6.031316583806818, "total_bits": 69567232.0, "err": 0.012773342430591583, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.mlp.down_proj", "numel": 11534336, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1711869673295454, "total_bits": 25043200.0, "err": 0.16065683960914612, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375732421875, "total_bits": 27402496.0, "err": 0.14186038076877594, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625732421875, "total_bits": 30286080.0, "err": 0.13353832066059113, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.716641512784091, "total_bits": 31334656.0, "err": 0.11784882843494415, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.216641512784091, "total_bits": 37101824.0, "err": 0.07425517588853836, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7393687855113638, "total_bits": 43131136.0, "err": 0.06601870059967041, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.03143310546875, "total_bits": 34965568.0, "err": 0.09292453527450562, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125732421875, "total_bits": 36053248.0, "err": 0.08240751177072525, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1711869673295454, "total_bits": 36577536.0, "err": 0.07671621441841125, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.534823330965909, "total_bits": 40771840.0, "err": 0.06294022500514984, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.653775301846591, "total_bits": 42143872.0, "err": 0.06001663580536842, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.03143310546875, "total_bits": 46499904.0, "err": 0.04730774462223053, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125732421875, "total_bits": 47587584.0, "err": 0.04008999094367027, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.216641512784091, "total_bits": 48636160.0, "err": 0.03635454550385475, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.307550603693182, "total_bits": 49684736.0, "err": 0.03544587641954422, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.03143310546875, "total_bits": 58034240.0, "err": 0.02400543913245201, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.216641512784091, "total_bits": 60170496.0, "err": 0.019826238974928856, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.307550603693182, "total_bits": 61219072.0, "err": 0.019476979970932007, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.534823330965909, "total_bits": 63840512.0, "err": 0.017699966207146645, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.693914240056818, "total_bits": 65675520.0, "err": 0.01729292795062065, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.03143310546875, "total_bits": 69568576.0, "err": 0.013244953006505966, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125732421875, "total_bits": 70656256.0, "err": 0.013873704709112644, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.213251287286932, "total_bits": 71665728.0, "err": 0.011959406547248363, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125732421875, "total_bits": 93724928.0, "err": 0.010602963156998158, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.534823330965909, "total_bits": 63840512.0, "err": 0.017699966207146645, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.534823330965909, "total_bits": 63840512.0, "err": 0.017699966207146645, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } } ], "last_module_idx": 46, "base_perplexity": 9.118522345730767 }