quantization / ext-torch /__init__.py
danieldk's picture
danieldk HF staff
Expose ops (handy for tests etc.)
e3a7455
raw
history blame
712 Bytes
from .compressed_tensors import scaled_fp8_quant, scaled_int8_quant
from .cutlass import (
cutlass_scaled_mm_supports_fp8,
cutlass_scaled_mm,
cutlass_scaled_mm_azp,
)
from .marlin import (
awq_marlin_repack,
fp8_marlin_gemm,
gptq_marlin_gemm,
gptq_marlin_repack,
gptq_marlin_24_gemm,
marlin_qqq_gemm,
marlin_gemm,
)
from ._ops import ops
__all__ = [
"awq_marlin_repack",
"cutlass_scaled_mm",
"cutlass_scaled_mm_azp",
"cutlass_scaled_mm_supports_fp8",
"fp8_marlin_gemm",
"gptq_marlin_24_gemm",
"gptq_marlin_gemm",
"gptq_marlin_repack",
"marlin_gemm",
"marlin_qqq_gemm",
"ops",
"scaled_fp8_quant",
"scaled_int8_quant",
]