namespace vllm { | |
enum class Fp8KVCacheDataType { | |
kAuto = 0, | |
kFp8E4M3 = 1, | |
kFp8E5M2 = 2, | |
}; | |
// fp8 vector types for quantization of kv cache | |
template <> | |
struct Vec<uint8_t, 1> { | |
using Type = uint8_t; | |
}; | |
template <> | |
struct Vec<uint8_t, 2> { | |
using Type = uint16_t; | |
}; | |
template <> | |
struct Vec<uint8_t, 4> { | |
using Type = uint32_t; | |
}; | |
template <> | |
struct Vec<uint8_t, 8> { | |
using Type = uint2; | |
}; | |
} // namespace vllm | |