|
#extension GL_EXT_shader_16bit_storage: require |
|
#extension GL_EXT_shader_8bit_storage: require |
|
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require |
|
#extension GL_EXT_shader_explicit_arithmetic_types_int8: require |
|
#extension GL_EXT_shader_explicit_arithmetic_types_int16: require |
|
#extension GL_EXT_shader_explicit_arithmetic_types_int64: require |
|
#extension GL_EXT_control_flow_attributes: enable |
|
#extension GL_KHR_shader_subgroup_arithmetic : require |
|
#extension GL_EXT_debug_printf : enable |
|
|
|
#define QK4_0 32 |
|
#define QK4_1 32 |
|
|
|
#define GELU_COEF_A 0.044715 |
|
#define SQRT_2_OVER_PI 0.79788456080286535587989211986876 |
|
#define TWOPI_F 6.283185307179586f |
|
|
|
#define QK_K 256 |
|
#define K_SCALE_SIZE 12 |
|
|
|
#define u8BufToU16(buf, idx) (((uint16_t(buf[idx + 1]) << 8)) | buf[idx]) |
|
#define u8BufToFloat16(buf, idx) uint16BitsToHalf u8BufToU16(buf, idx) |
|
#define u8BufToU32(buf, idx) (((uint32_t u8BufToU16(buf, idx + 2) << 8 | buf[idx + 1]) << 8) | buf[idx]) |
|
#define u8BufToFloat(buf, idx) uintBitsToFloat u8BufToU32(buf, idx) |
|
|
|
#define sizeof_block_q4_0 0x12 |
|
struct block_q4_0 { |
|
float16_t d; |
|
uint8_t qs[QK4_0 / 2]; |
|
}; |
|
mat4 dequantize_q4_0(const block_q4_0 xb, uint il) { |
|
const float d1 = il != 0 ? (xb.d / 16.f) : xb.d; |
|
const float d2 = d1 / 256.f; |
|
const float md = -8.f * xb.d; |
|
const uint16_t mask0 = il != 0 ? uint16_t(0x00F0) : uint16_t(0x000F); |
|
const uint16_t mask1 = mask0 << 8; |
|
|
|
mat4 reg; |
|
for (int i=0;i<8;i++) { |
|
uint16_t b = (uint16_t(xb.qs[2 * i + 1]) << 8) | uint16_t(xb.qs[2 * i]); |
|
reg[i/2][2*(i%2)+0] = d1 * (b & mask0) + md; |
|
reg[i/2][2*(i%2)+1] = d2 * (b & mask1) + md; |
|
} |
|
return reg; |
|
} |
|
|
|
#define sizeof_block_q4_1 0x14 |
|
struct block_q4_1 { |
|
float16_t d; |
|
float16_t m; |
|
uint8_t qs[QK4_1 / 2]; |
|
}; |
|
mat4 dequantize_q4_1(const block_q4_1 xb, uint il) { |
|
const float d1 = il != 0 ? (xb.d / 16.f) : xb.d; |
|
const float d2 = d1 / 256.f; |
|
const float m = xb.m; |
|
const uint16_t mask0 = il != 0 ? uint16_t(0x00F0) : uint16_t(0x000F); |
|
const uint16_t mask1 = mask0 << 8; |
|
|
|
mat4 reg; |
|
for (int i=0;i<8;i++) { |
|
uint16_t b = (uint16_t(xb.qs[2 * i + 1]) << 8) | uint16_t(xb.qs[2 * i]); |
|
reg[i/2][2*(i%2)+0] = ((b & mask0) * d1) + m; |
|
reg[i/2][2*(i%2)+1] = ((b & mask1) * d2) + m; |
|
} |
|
return reg; |
|
} |
|
|
|
#define sizeof_block_q4_k 144 |
|
struct block_q4_k { |
|
float16_t d; |
|
float16_t dmin; |
|
uint8_t scales[K_SCALE_SIZE]; |
|
uint8_t qs[QK_K/2]; |
|
}; |
|
|
|
#define sizeof_block_q6_k 210 |
|
struct block_q6_k { |
|
uint8_t ql[QK_K/2]; |
|
uint8_t qh[QK_K/4]; |
|
int8_t scales[QK_K/16]; |
|
float16_t d; |
|
}; |
|
mat4 dequantize_q6_k(const block_q6_k xb, uint il) { |
|
const float16_t d_all = xb.d; |
|
|
|
const uint qlIndex = 64*(il/8) + 32*((il/2)&1) + 16*(il&1); |
|
const uint qhIndex = 32*(il/8) + 16*(il&1); |
|
float16_t sc = xb.scales[(il%2) + 2 * ((il/2))]; |
|
il = (il/2) & 3; |
|
|
|
const uint16_t kmask1 = il>1 ? uint16_t(il>2 ? 192 : 48) : uint16_t(il>0 ? 12 : 3); |
|
const uint16_t kmask2 = il>1 ? uint8_t(0xF0) : uint8_t(0x0F); |
|
const float16_t coef = il>1 ? float16_t(1.f/16.f) : float16_t(1.f); |
|
const float16_t ml = float16_t(d_all * sc * 32.f); |
|
const float16_t dl = float16_t(d_all * sc * coef); |
|
mat4 reg; |
|
for (int i = 0; i < 16; ++i) { |
|
const float16_t q = (il&1) != 0 ? ((xb.ql[qlIndex + i] & kmask2) | ((xb.qh[qhIndex + i] & kmask1) << 2)) |
|
: ((xb.ql[qlIndex + i] & kmask2) | ((xb.qh[qhIndex + i] & kmask1) << 4)); |
|
reg[i/4][i%4] = dl * q - ml; |
|
} |
|
return reg; |
|
} |
|
|
|
|
|
#define QK8_0 32 |
|
|
|
|
|
|
|
|
|
#define sizeof_block_q8_0 34 |
|
|