diff --git a/build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc b/build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6a42e8dd605460bc539ad6d15e6ddda7c0c4d96b Binary files /dev/null and b/build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc b/build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..820bb689b95a3410a7fd872173d206131ae1caae Binary files /dev/null and b/build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc b/build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0e5238d2eeceb8929e9aec9cd1fbdb784f5bbfc7 Binary files /dev/null and b/build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc b/build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..128c3a6164773b1711739657f2973ed8f8b1ca05 Binary files /dev/null and b/build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc b/build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2c725ed99b1cfb2cf07d906fcd0789ffbc5252b1 Binary files /dev/null and b/build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc b/build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..170bb642b275da4e0430a657d4c86900b0c1fa96 Binary files /dev/null and b/build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc b/build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0daa4d427f8b9c46dc84b588d958fb726c638a03 Binary files /dev/null and b/build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu118-x86_64-linux/quantization/_ops.py b/build/torch26-cxx11-cu118-x86_64-linux/quantization/_ops.py index 046a27db01752cf820b0418cc0cd195afc362c86..f607840072f0680e47fea4e204f29a7c1d4d5d43 100644 --- a/build/torch26-cxx11-cu118-x86_64-linux/quantization/_ops.py +++ b/build/torch26-cxx11-cu118-x86_64-linux/quantization/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _quantization_dfa7d18 -ops = torch.ops._quantization_dfa7d18 +from . import _quantization_3313895 +ops = torch.ops._quantization_3313895 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_quantization_dfa7d18::{op_name}" \ No newline at end of file + return f"_quantization_3313895::{op_name}" \ No newline at end of file diff --git a/build/torch26-cxx11-cu118-x86_64-linux/quantization/_quantization_3313895.abi3.so b/build/torch26-cxx11-cu118-x86_64-linux/quantization/_quantization_3313895.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..d8f0c9fe1bb218361ab1b7f374bcd51410c28a1c --- /dev/null +++ b/build/torch26-cxx11-cu118-x86_64-linux/quantization/_quantization_3313895.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8234b0e279e6da2eb6bfe5a6a88635365f861d8a8cbe2a1c3340f507c37ca487 +size 155756104 diff --git a/build/torch26-cxx11-cu118-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so b/build/torch26-cxx11-cu118-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so deleted file mode 100755 index 8c3dba3b1559e54f481b79be05ad0ffa6948fffa..0000000000000000000000000000000000000000 --- a/build/torch26-cxx11-cu118-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:05b3dbcc1c3200458ec526bc95169a8b286704dbbfe93b1b5bb580d490be4f3d -size 155751904 diff --git a/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc b/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c9c92719c2e713ec02a5b8d039f0b6b3579cc512 Binary files /dev/null and b/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc b/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9fa58b1ca3855a04191fa6b2cae88ab10cb51698 Binary files /dev/null and b/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc b/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..434a47496ad4047de37fbca9c186eefcfece2bea Binary files /dev/null and b/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc b/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..565db147ac8d3e49f5a40aa8cfe6dcd443dd6a13 Binary files /dev/null and b/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc b/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f4df3f54a1088ce44a5ea43170618ae6ad888020 Binary files /dev/null and b/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils.py b/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils.py index eb2f41d72984bdfbe03a6d71b632371025156448..d587f60d60f9e9f32410d3b12bed8c51fa8e6822 100644 --- a/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils.py +++ b/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils.py @@ -6,8 +6,7 @@ from typing import Optional import numpy import torch -import quantization as ops -from quantization.scalar_type import ScalarType, scalar_types +from .. import ScalarType, gptq_marlin_gemm, scalar_types from .quant_utils import pack_cols, unpack_cols @@ -383,7 +382,7 @@ def apply_gptq_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(reshaped_x, + output = gptq_marlin_gemm(reshaped_x, None, weight, weight_scale, @@ -429,7 +428,7 @@ def apply_awq_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(reshaped_x, + output = gptq_marlin_gemm(reshaped_x, None, weight, weight_scale, diff --git a/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_fp4.py b/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_fp4.py index b6697e1394328f52681dd2b8870fe826d9be5ba3..44348f6491cba69fa04df9f3b09cafac24df0565 100644 --- a/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_fp4.py +++ b/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_fp4.py @@ -5,12 +5,11 @@ from typing import Optional import torch -import quantization as ops - +from .. import gptq_marlin_gemm, gptq_marlin_repack from .marlin_utils import ( USE_FP32_REDUCE_DEFAULT, marlin_make_workspace_new, marlin_permute_scales, should_use_atomic_add_reduce) -from quantization.scalar_type import scalar_types +from ..scalar_type import scalar_types FP4_MARLIN_SUPPORTED_GROUP_SIZES = [16] @@ -90,7 +89,7 @@ def apply_fp4_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(a=reshaped_x, + output = gptq_marlin_gemm(a=reshaped_x, c=None, b_q_weight=weight, b_scales=weight_scale, @@ -135,7 +134,7 @@ def prepare_fp4_layer_for_marlin(layer: torch.nn.Module) -> None: perm = torch.empty(0, dtype=torch.int, device=device) qweight = layer.weight.view(torch.int32).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight, + marlin_qweight = gptq_marlin_repack(b_q_weight=qweight, perm=perm, size_k=part_size_k, size_n=part_size_n, @@ -192,7 +191,7 @@ def prepare_moe_fp4_layer_for_marlin(layer: torch.nn.Module) -> None: for i in range(e): qweight = weight[i].view(torch.int32).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight, + marlin_qweight = gptq_marlin_repack(b_q_weight=qweight, perm=perm, size_k=size_k, size_n=size_n, @@ -263,7 +262,7 @@ def rand_marlin_weight_fp4_like(weight, group_size): weight_ref = weight_ref * global_scale.to(weight.dtype) * \ scales.repeat_interleave(group_size, 1).to(weight.dtype) - marlin_qweight = ops.gptq_marlin_repack( + marlin_qweight = gptq_marlin_repack( b_q_weight=fp4_weight.view(torch.int32).T.contiguous(), perm=torch.empty(0, dtype=torch.int, device=device), size_k=size_k, diff --git a/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_fp8.py b/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_fp8.py index b38fe2d4aff0234cdbf08218da6440b4892e01f0..4ebd749729cfc3c8ada3a85b67b85a11b0f97a5d 100644 --- a/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_fp8.py +++ b/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_fp8.py @@ -5,7 +5,7 @@ from typing import Optional import torch -import quantization as ops +from .. import gptq_marlin_gemm, gptq_marlin_repack from .marlin_utils import USE_FP32_REDUCE_DEFAULT, marlin_make_workspace, marlin_permute_scales @@ -51,7 +51,7 @@ def apply_fp8_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(a=reshaped_x, + output = gptq_marlin_gemm(a=reshaped_x, c=None, b_q_weight=weight, b_scales=weight_scale, @@ -104,7 +104,7 @@ def marlin_quant_fp8_torch(weight, group_size): weight_ref = fp8_weight.to(weight.dtype) * repeated_scales packed_weight = pack_fp8_to_int32(fp8_weight, False).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack( + marlin_qweight = gptq_marlin_repack( b_q_weight=packed_weight, perm=torch.empty(0, dtype=torch.int, device=device), size_k=size_k, diff --git a/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_test.py b/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_test.py index 7d4f5f3cfbb872bf7b32e0972d6143b43f354a5e..b03555f5bffa8d48187d37e53f17e6e3138472cc 100644 --- a/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_test.py +++ b/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_test.py @@ -5,8 +5,7 @@ from typing import List, Optional import numpy as np import torch -from quantization.scalar_type import ScalarType - +from ..scalar_type import ScalarType from .marlin_utils import GPTQ_MARLIN_TILE, marlin_permute_scales, marlin_zero_points from .quant_utils import ( get_pack_factor, diff --git a/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_test_24.py b/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_test_24.py index 927fa9016ba25f381c09d768db0c468066193a76..1c907245c66417ff8715f18474024a1c20c316a7 100644 --- a/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_test_24.py +++ b/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_test_24.py @@ -6,8 +6,7 @@ from typing import List import numpy import torch -from quantization.scalar_type import ScalarType - +from ..scalar_type import ScalarType from .marlin_utils_test import marlin_weights from .quant_utils import gptq_quantize_weights diff --git a/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/quant_utils.py b/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/quant_utils.py index d97e03913fa5980e0be73b160088c8e4f5f49a52..65c90821773b60cca2b0f9102ca7ad4e3cd009d5 100644 --- a/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/quant_utils.py +++ b/build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/quant_utils.py @@ -5,7 +5,7 @@ from typing import List, Optional import numpy import torch -from quantization.scalar_type import ScalarType, scalar_types +from ..scalar_type import ScalarType, scalar_types SUPPORTED_GPTQ_QUANT_TYPES = [scalar_types.uint4b8, scalar_types.uint8b128] SUPPORTED_GROUP_SIZES = [-1, 32, 64, 128] diff --git a/build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc b/build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d024e5981c3ff7348e3f8fc2e0c35db532b4f54e Binary files /dev/null and b/build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc b/build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..95aff7513c0dee57b8ffb5e47fb77bd89ba8727c Binary files /dev/null and b/build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc b/build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3b51fb20112921bbc667a044ad6ad86df75852fd Binary files /dev/null and b/build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc b/build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aa35c0b26de48238035ae759c3f2db0d073a0b8a Binary files /dev/null and b/build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc b/build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ca67cafd04e6b4bf1eecff15a56a7271a2262f8e Binary files /dev/null and b/build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc b/build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..094f327508a2313ed0ce5daf94dabb2b5fb04377 Binary files /dev/null and b/build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc b/build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f06538651f5769641b9c12ef79352a1c118109bf Binary files /dev/null and b/build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu124-x86_64-linux/quantization/_ops.py b/build/torch26-cxx11-cu124-x86_64-linux/quantization/_ops.py index 046a27db01752cf820b0418cc0cd195afc362c86..f607840072f0680e47fea4e204f29a7c1d4d5d43 100644 --- a/build/torch26-cxx11-cu124-x86_64-linux/quantization/_ops.py +++ b/build/torch26-cxx11-cu124-x86_64-linux/quantization/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _quantization_dfa7d18 -ops = torch.ops._quantization_dfa7d18 +from . import _quantization_3313895 +ops = torch.ops._quantization_3313895 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_quantization_dfa7d18::{op_name}" \ No newline at end of file + return f"_quantization_3313895::{op_name}" \ No newline at end of file diff --git a/build/torch26-cxx11-cu124-x86_64-linux/quantization/_quantization_3313895.abi3.so b/build/torch26-cxx11-cu124-x86_64-linux/quantization/_quantization_3313895.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..e69eaffa7f7a1dabc26edc3a1778464ca034b7c1 --- /dev/null +++ b/build/torch26-cxx11-cu124-x86_64-linux/quantization/_quantization_3313895.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70790ec67aaa48046db424362f76724cf70ecc91bf479c88da71ea6592bb637f +size 159578136 diff --git a/build/torch26-cxx11-cu124-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so b/build/torch26-cxx11-cu124-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so deleted file mode 100755 index 2ecd0069776902c1cd7367a5f0ed49cd63179c83..0000000000000000000000000000000000000000 --- a/build/torch26-cxx11-cu124-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b5dc49e9b5709f18d3e12ab2d76e37743c31cb2602d219e80173a9c5c0ba1acd -size 159574040 diff --git a/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc b/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8289105078006e2105cf4269cf1e96b0f7112fd6 Binary files /dev/null and b/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc b/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6522046106cb08c3d86e9630314f4b536504aad6 Binary files /dev/null and b/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc b/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b2d8f82755bec5e6fd90e30dd8e5711c9f5c56ae Binary files /dev/null and b/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc b/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1bf521de490c12f6cd62eb85d5355ff35df80325 Binary files /dev/null and b/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc b/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..45a2b107bb32d5dd8c26ac1bdcb2e7b4c35f2577 Binary files /dev/null and b/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils.py b/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils.py index eb2f41d72984bdfbe03a6d71b632371025156448..d587f60d60f9e9f32410d3b12bed8c51fa8e6822 100644 --- a/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils.py +++ b/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils.py @@ -6,8 +6,7 @@ from typing import Optional import numpy import torch -import quantization as ops -from quantization.scalar_type import ScalarType, scalar_types +from .. import ScalarType, gptq_marlin_gemm, scalar_types from .quant_utils import pack_cols, unpack_cols @@ -383,7 +382,7 @@ def apply_gptq_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(reshaped_x, + output = gptq_marlin_gemm(reshaped_x, None, weight, weight_scale, @@ -429,7 +428,7 @@ def apply_awq_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(reshaped_x, + output = gptq_marlin_gemm(reshaped_x, None, weight, weight_scale, diff --git a/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils_fp4.py b/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils_fp4.py index b6697e1394328f52681dd2b8870fe826d9be5ba3..44348f6491cba69fa04df9f3b09cafac24df0565 100644 --- a/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils_fp4.py +++ b/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils_fp4.py @@ -5,12 +5,11 @@ from typing import Optional import torch -import quantization as ops - +from .. import gptq_marlin_gemm, gptq_marlin_repack from .marlin_utils import ( USE_FP32_REDUCE_DEFAULT, marlin_make_workspace_new, marlin_permute_scales, should_use_atomic_add_reduce) -from quantization.scalar_type import scalar_types +from ..scalar_type import scalar_types FP4_MARLIN_SUPPORTED_GROUP_SIZES = [16] @@ -90,7 +89,7 @@ def apply_fp4_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(a=reshaped_x, + output = gptq_marlin_gemm(a=reshaped_x, c=None, b_q_weight=weight, b_scales=weight_scale, @@ -135,7 +134,7 @@ def prepare_fp4_layer_for_marlin(layer: torch.nn.Module) -> None: perm = torch.empty(0, dtype=torch.int, device=device) qweight = layer.weight.view(torch.int32).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight, + marlin_qweight = gptq_marlin_repack(b_q_weight=qweight, perm=perm, size_k=part_size_k, size_n=part_size_n, @@ -192,7 +191,7 @@ def prepare_moe_fp4_layer_for_marlin(layer: torch.nn.Module) -> None: for i in range(e): qweight = weight[i].view(torch.int32).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight, + marlin_qweight = gptq_marlin_repack(b_q_weight=qweight, perm=perm, size_k=size_k, size_n=size_n, @@ -263,7 +262,7 @@ def rand_marlin_weight_fp4_like(weight, group_size): weight_ref = weight_ref * global_scale.to(weight.dtype) * \ scales.repeat_interleave(group_size, 1).to(weight.dtype) - marlin_qweight = ops.gptq_marlin_repack( + marlin_qweight = gptq_marlin_repack( b_q_weight=fp4_weight.view(torch.int32).T.contiguous(), perm=torch.empty(0, dtype=torch.int, device=device), size_k=size_k, diff --git a/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils_fp8.py b/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils_fp8.py index b38fe2d4aff0234cdbf08218da6440b4892e01f0..4ebd749729cfc3c8ada3a85b67b85a11b0f97a5d 100644 --- a/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils_fp8.py +++ b/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils_fp8.py @@ -5,7 +5,7 @@ from typing import Optional import torch -import quantization as ops +from .. import gptq_marlin_gemm, gptq_marlin_repack from .marlin_utils import USE_FP32_REDUCE_DEFAULT, marlin_make_workspace, marlin_permute_scales @@ -51,7 +51,7 @@ def apply_fp8_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(a=reshaped_x, + output = gptq_marlin_gemm(a=reshaped_x, c=None, b_q_weight=weight, b_scales=weight_scale, @@ -104,7 +104,7 @@ def marlin_quant_fp8_torch(weight, group_size): weight_ref = fp8_weight.to(weight.dtype) * repeated_scales packed_weight = pack_fp8_to_int32(fp8_weight, False).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack( + marlin_qweight = gptq_marlin_repack( b_q_weight=packed_weight, perm=torch.empty(0, dtype=torch.int, device=device), size_k=size_k, diff --git a/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils_test.py b/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils_test.py index 7d4f5f3cfbb872bf7b32e0972d6143b43f354a5e..b03555f5bffa8d48187d37e53f17e6e3138472cc 100644 --- a/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils_test.py +++ b/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils_test.py @@ -5,8 +5,7 @@ from typing import List, Optional import numpy as np import torch -from quantization.scalar_type import ScalarType - +from ..scalar_type import ScalarType from .marlin_utils import GPTQ_MARLIN_TILE, marlin_permute_scales, marlin_zero_points from .quant_utils import ( get_pack_factor, diff --git a/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils_test_24.py b/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils_test_24.py index 927fa9016ba25f381c09d768db0c468066193a76..1c907245c66417ff8715f18474024a1c20c316a7 100644 --- a/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils_test_24.py +++ b/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils_test_24.py @@ -6,8 +6,7 @@ from typing import List import numpy import torch -from quantization.scalar_type import ScalarType - +from ..scalar_type import ScalarType from .marlin_utils_test import marlin_weights from .quant_utils import gptq_quantize_weights diff --git a/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/quant_utils.py b/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/quant_utils.py index d97e03913fa5980e0be73b160088c8e4f5f49a52..65c90821773b60cca2b0f9102ca7ad4e3cd009d5 100644 --- a/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/quant_utils.py +++ b/build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/quant_utils.py @@ -5,7 +5,7 @@ from typing import List, Optional import numpy import torch -from quantization.scalar_type import ScalarType, scalar_types +from ..scalar_type import ScalarType, scalar_types SUPPORTED_GPTQ_QUANT_TYPES = [scalar_types.uint4b8, scalar_types.uint8b128] SUPPORTED_GROUP_SIZES = [-1, 32, 64, 128] diff --git a/build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc b/build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cfd7187e0166ffb2e07b1ac50541cbe2a3418e64 Binary files /dev/null and b/build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc b/build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..03b3b823b23dfa8cf9416c7b9aca2f496a86d096 Binary files /dev/null and b/build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc b/build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..70d815114d232d292e5db15be8e2b31746d8baf9 Binary files /dev/null and b/build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc b/build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..56fce181a30457a5ae141c06b9f7ffd0e5c68e0d Binary files /dev/null and b/build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc b/build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fe221766e23ecf99f776ef7ff4d9466d4af7c781 Binary files /dev/null and b/build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc b/build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c0fcb850188467950338e920e5571ddef399675a Binary files /dev/null and b/build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc b/build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d1f5f1971d51a0fe22de058ad4f120aafdac2f7b Binary files /dev/null and b/build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu126-x86_64-linux/quantization/_ops.py b/build/torch26-cxx11-cu126-x86_64-linux/quantization/_ops.py index 046a27db01752cf820b0418cc0cd195afc362c86..f607840072f0680e47fea4e204f29a7c1d4d5d43 100644 --- a/build/torch26-cxx11-cu126-x86_64-linux/quantization/_ops.py +++ b/build/torch26-cxx11-cu126-x86_64-linux/quantization/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _quantization_dfa7d18 -ops = torch.ops._quantization_dfa7d18 +from . import _quantization_3313895 +ops = torch.ops._quantization_3313895 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_quantization_dfa7d18::{op_name}" \ No newline at end of file + return f"_quantization_3313895::{op_name}" \ No newline at end of file diff --git a/build/torch26-cxx11-cu126-x86_64-linux/quantization/_quantization_3313895.abi3.so b/build/torch26-cxx11-cu126-x86_64-linux/quantization/_quantization_3313895.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..1dca183d71e20ddf8ef500cabdc66432f621ecff --- /dev/null +++ b/build/torch26-cxx11-cu126-x86_64-linux/quantization/_quantization_3313895.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c33c0b9d3ba9e713cdb661ee669e95379212181902123b84161cf84dee599bca +size 160276536 diff --git a/build/torch26-cxx11-cu126-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so b/build/torch26-cxx11-cu126-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so deleted file mode 100755 index 9ecd831fd61c366a7686b0ece23c62e0e75f07e9..0000000000000000000000000000000000000000 --- a/build/torch26-cxx11-cu126-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:af7fad3054f0981d175aa7dcabf9dbe3c556ba0dcee7f20a2c104abd17dce7a5 -size 160280624 diff --git a/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc b/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..15295daf06e5a0dc3e8b205b37bd8eda3244be8a Binary files /dev/null and b/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc b/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f2247c0761f34542a54050777074169cb619e5b4 Binary files /dev/null and b/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc b/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..75031871c4e6417d0677ffa172d095587a8ad4d5 Binary files /dev/null and b/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc b/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d7f4a700242a8aeb956e452f10ef0b527ed016c6 Binary files /dev/null and b/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc b/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3524a3c8a47117dbc0dff0502a2d0ca7dd62f21c Binary files /dev/null and b/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils.py b/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils.py index eb2f41d72984bdfbe03a6d71b632371025156448..d587f60d60f9e9f32410d3b12bed8c51fa8e6822 100644 --- a/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils.py +++ b/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils.py @@ -6,8 +6,7 @@ from typing import Optional import numpy import torch -import quantization as ops -from quantization.scalar_type import ScalarType, scalar_types +from .. import ScalarType, gptq_marlin_gemm, scalar_types from .quant_utils import pack_cols, unpack_cols @@ -383,7 +382,7 @@ def apply_gptq_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(reshaped_x, + output = gptq_marlin_gemm(reshaped_x, None, weight, weight_scale, @@ -429,7 +428,7 @@ def apply_awq_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(reshaped_x, + output = gptq_marlin_gemm(reshaped_x, None, weight, weight_scale, diff --git a/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_fp4.py b/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_fp4.py index b6697e1394328f52681dd2b8870fe826d9be5ba3..44348f6491cba69fa04df9f3b09cafac24df0565 100644 --- a/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_fp4.py +++ b/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_fp4.py @@ -5,12 +5,11 @@ from typing import Optional import torch -import quantization as ops - +from .. import gptq_marlin_gemm, gptq_marlin_repack from .marlin_utils import ( USE_FP32_REDUCE_DEFAULT, marlin_make_workspace_new, marlin_permute_scales, should_use_atomic_add_reduce) -from quantization.scalar_type import scalar_types +from ..scalar_type import scalar_types FP4_MARLIN_SUPPORTED_GROUP_SIZES = [16] @@ -90,7 +89,7 @@ def apply_fp4_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(a=reshaped_x, + output = gptq_marlin_gemm(a=reshaped_x, c=None, b_q_weight=weight, b_scales=weight_scale, @@ -135,7 +134,7 @@ def prepare_fp4_layer_for_marlin(layer: torch.nn.Module) -> None: perm = torch.empty(0, dtype=torch.int, device=device) qweight = layer.weight.view(torch.int32).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight, + marlin_qweight = gptq_marlin_repack(b_q_weight=qweight, perm=perm, size_k=part_size_k, size_n=part_size_n, @@ -192,7 +191,7 @@ def prepare_moe_fp4_layer_for_marlin(layer: torch.nn.Module) -> None: for i in range(e): qweight = weight[i].view(torch.int32).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight, + marlin_qweight = gptq_marlin_repack(b_q_weight=qweight, perm=perm, size_k=size_k, size_n=size_n, @@ -263,7 +262,7 @@ def rand_marlin_weight_fp4_like(weight, group_size): weight_ref = weight_ref * global_scale.to(weight.dtype) * \ scales.repeat_interleave(group_size, 1).to(weight.dtype) - marlin_qweight = ops.gptq_marlin_repack( + marlin_qweight = gptq_marlin_repack( b_q_weight=fp4_weight.view(torch.int32).T.contiguous(), perm=torch.empty(0, dtype=torch.int, device=device), size_k=size_k, diff --git a/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_fp8.py b/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_fp8.py index b38fe2d4aff0234cdbf08218da6440b4892e01f0..4ebd749729cfc3c8ada3a85b67b85a11b0f97a5d 100644 --- a/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_fp8.py +++ b/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_fp8.py @@ -5,7 +5,7 @@ from typing import Optional import torch -import quantization as ops +from .. import gptq_marlin_gemm, gptq_marlin_repack from .marlin_utils import USE_FP32_REDUCE_DEFAULT, marlin_make_workspace, marlin_permute_scales @@ -51,7 +51,7 @@ def apply_fp8_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(a=reshaped_x, + output = gptq_marlin_gemm(a=reshaped_x, c=None, b_q_weight=weight, b_scales=weight_scale, @@ -104,7 +104,7 @@ def marlin_quant_fp8_torch(weight, group_size): weight_ref = fp8_weight.to(weight.dtype) * repeated_scales packed_weight = pack_fp8_to_int32(fp8_weight, False).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack( + marlin_qweight = gptq_marlin_repack( b_q_weight=packed_weight, perm=torch.empty(0, dtype=torch.int, device=device), size_k=size_k, diff --git a/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_test.py b/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_test.py index 7d4f5f3cfbb872bf7b32e0972d6143b43f354a5e..b03555f5bffa8d48187d37e53f17e6e3138472cc 100644 --- a/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_test.py +++ b/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_test.py @@ -5,8 +5,7 @@ from typing import List, Optional import numpy as np import torch -from quantization.scalar_type import ScalarType - +from ..scalar_type import ScalarType from .marlin_utils import GPTQ_MARLIN_TILE, marlin_permute_scales, marlin_zero_points from .quant_utils import ( get_pack_factor, diff --git a/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_test_24.py b/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_test_24.py index 927fa9016ba25f381c09d768db0c468066193a76..1c907245c66417ff8715f18474024a1c20c316a7 100644 --- a/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_test_24.py +++ b/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_test_24.py @@ -6,8 +6,7 @@ from typing import List import numpy import torch -from quantization.scalar_type import ScalarType - +from ..scalar_type import ScalarType from .marlin_utils_test import marlin_weights from .quant_utils import gptq_quantize_weights diff --git a/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/quant_utils.py b/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/quant_utils.py index d97e03913fa5980e0be73b160088c8e4f5f49a52..65c90821773b60cca2b0f9102ca7ad4e3cd009d5 100644 --- a/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/quant_utils.py +++ b/build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/quant_utils.py @@ -5,7 +5,7 @@ from typing import List, Optional import numpy import torch -from quantization.scalar_type import ScalarType, scalar_types +from ..scalar_type import ScalarType, scalar_types SUPPORTED_GPTQ_QUANT_TYPES = [scalar_types.uint4b8, scalar_types.uint8b128] SUPPORTED_GROUP_SIZES = [-1, 32, 64, 128] diff --git a/build/torch26-cxx98-cu118-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc b/build/torch26-cxx98-cu118-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..260605869dd6249669412a610e9861644c60cf68 Binary files /dev/null and b/build/torch26-cxx98-cu118-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu118-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc b/build/torch26-cxx98-cu118-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9e79ae5f9afaf3d670e1ed8ced06be74083b15b2 Binary files /dev/null and b/build/torch26-cxx98-cu118-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu118-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc b/build/torch26-cxx98-cu118-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b94c8a733d41726001c61dfdf34edfd0792e163e Binary files /dev/null and b/build/torch26-cxx98-cu118-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu118-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc b/build/torch26-cxx98-cu118-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f6a30cb618d5ea207c9c10a4b0c6f22d5c3e9117 Binary files /dev/null and b/build/torch26-cxx98-cu118-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu118-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc b/build/torch26-cxx98-cu118-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f33eb2005c619943594d472cb1891129bba54a3f Binary files /dev/null and b/build/torch26-cxx98-cu118-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu118-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc b/build/torch26-cxx98-cu118-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bd95998f558204bd28343c541d8d41a546b65990 Binary files /dev/null and b/build/torch26-cxx98-cu118-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu118-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc b/build/torch26-cxx98-cu118-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..08950cee901d4a6746090af62f8b4e578ee43167 Binary files /dev/null and b/build/torch26-cxx98-cu118-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu118-x86_64-linux/quantization/_ops.py b/build/torch26-cxx98-cu118-x86_64-linux/quantization/_ops.py index 046a27db01752cf820b0418cc0cd195afc362c86..f607840072f0680e47fea4e204f29a7c1d4d5d43 100644 --- a/build/torch26-cxx98-cu118-x86_64-linux/quantization/_ops.py +++ b/build/torch26-cxx98-cu118-x86_64-linux/quantization/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _quantization_dfa7d18 -ops = torch.ops._quantization_dfa7d18 +from . import _quantization_3313895 +ops = torch.ops._quantization_3313895 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_quantization_dfa7d18::{op_name}" \ No newline at end of file + return f"_quantization_3313895::{op_name}" \ No newline at end of file diff --git a/build/torch26-cxx98-cu118-x86_64-linux/quantization/_quantization_3313895.abi3.so b/build/torch26-cxx98-cu118-x86_64-linux/quantization/_quantization_3313895.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..f1a8a98aa222ea4c5cfe590fda1466e65e11f305 --- /dev/null +++ b/build/torch26-cxx98-cu118-x86_64-linux/quantization/_quantization_3313895.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17fd2aa139eac1e908024c0a77e62b70a59d5291fd1f74c71001c2f2b44a8073 +size 155735984 diff --git a/build/torch26-cxx98-cu118-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so b/build/torch26-cxx98-cu118-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so deleted file mode 100755 index b528c0fd91cdf375f6323735fec6890d976fb805..0000000000000000000000000000000000000000 --- a/build/torch26-cxx98-cu118-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9736b3b73f06d4fd9881fd417dbe72aa7e5e4cbc2845ca247b9427b2b7f2b858 -size 155739832 diff --git a/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc b/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..73598c760956c188fa5cacc82ff94de6ff36fe1e Binary files /dev/null and b/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc b/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..710ded60ac9f6aad7009fc657302735ca9379c67 Binary files /dev/null and b/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc b/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..19f83b974c638991d368c12a751aa2c91a63eb29 Binary files /dev/null and b/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc b/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..53115f81586ac84f4eceb1b60ee55eba72bd6bec Binary files /dev/null and b/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc b/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2288c7b88b9d0b3a374e3840ecf5b71430d94a72 Binary files /dev/null and b/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/marlin_utils.py b/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/marlin_utils.py index eb2f41d72984bdfbe03a6d71b632371025156448..d587f60d60f9e9f32410d3b12bed8c51fa8e6822 100644 --- a/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/marlin_utils.py +++ b/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/marlin_utils.py @@ -6,8 +6,7 @@ from typing import Optional import numpy import torch -import quantization as ops -from quantization.scalar_type import ScalarType, scalar_types +from .. import ScalarType, gptq_marlin_gemm, scalar_types from .quant_utils import pack_cols, unpack_cols @@ -383,7 +382,7 @@ def apply_gptq_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(reshaped_x, + output = gptq_marlin_gemm(reshaped_x, None, weight, weight_scale, @@ -429,7 +428,7 @@ def apply_awq_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(reshaped_x, + output = gptq_marlin_gemm(reshaped_x, None, weight, weight_scale, diff --git a/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/marlin_utils_fp4.py b/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/marlin_utils_fp4.py index b6697e1394328f52681dd2b8870fe826d9be5ba3..44348f6491cba69fa04df9f3b09cafac24df0565 100644 --- a/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/marlin_utils_fp4.py +++ b/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/marlin_utils_fp4.py @@ -5,12 +5,11 @@ from typing import Optional import torch -import quantization as ops - +from .. import gptq_marlin_gemm, gptq_marlin_repack from .marlin_utils import ( USE_FP32_REDUCE_DEFAULT, marlin_make_workspace_new, marlin_permute_scales, should_use_atomic_add_reduce) -from quantization.scalar_type import scalar_types +from ..scalar_type import scalar_types FP4_MARLIN_SUPPORTED_GROUP_SIZES = [16] @@ -90,7 +89,7 @@ def apply_fp4_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(a=reshaped_x, + output = gptq_marlin_gemm(a=reshaped_x, c=None, b_q_weight=weight, b_scales=weight_scale, @@ -135,7 +134,7 @@ def prepare_fp4_layer_for_marlin(layer: torch.nn.Module) -> None: perm = torch.empty(0, dtype=torch.int, device=device) qweight = layer.weight.view(torch.int32).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight, + marlin_qweight = gptq_marlin_repack(b_q_weight=qweight, perm=perm, size_k=part_size_k, size_n=part_size_n, @@ -192,7 +191,7 @@ def prepare_moe_fp4_layer_for_marlin(layer: torch.nn.Module) -> None: for i in range(e): qweight = weight[i].view(torch.int32).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight, + marlin_qweight = gptq_marlin_repack(b_q_weight=qweight, perm=perm, size_k=size_k, size_n=size_n, @@ -263,7 +262,7 @@ def rand_marlin_weight_fp4_like(weight, group_size): weight_ref = weight_ref * global_scale.to(weight.dtype) * \ scales.repeat_interleave(group_size, 1).to(weight.dtype) - marlin_qweight = ops.gptq_marlin_repack( + marlin_qweight = gptq_marlin_repack( b_q_weight=fp4_weight.view(torch.int32).T.contiguous(), perm=torch.empty(0, dtype=torch.int, device=device), size_k=size_k, diff --git a/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/marlin_utils_fp8.py b/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/marlin_utils_fp8.py index b38fe2d4aff0234cdbf08218da6440b4892e01f0..4ebd749729cfc3c8ada3a85b67b85a11b0f97a5d 100644 --- a/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/marlin_utils_fp8.py +++ b/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/marlin_utils_fp8.py @@ -5,7 +5,7 @@ from typing import Optional import torch -import quantization as ops +from .. import gptq_marlin_gemm, gptq_marlin_repack from .marlin_utils import USE_FP32_REDUCE_DEFAULT, marlin_make_workspace, marlin_permute_scales @@ -51,7 +51,7 @@ def apply_fp8_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(a=reshaped_x, + output = gptq_marlin_gemm(a=reshaped_x, c=None, b_q_weight=weight, b_scales=weight_scale, @@ -104,7 +104,7 @@ def marlin_quant_fp8_torch(weight, group_size): weight_ref = fp8_weight.to(weight.dtype) * repeated_scales packed_weight = pack_fp8_to_int32(fp8_weight, False).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack( + marlin_qweight = gptq_marlin_repack( b_q_weight=packed_weight, perm=torch.empty(0, dtype=torch.int, device=device), size_k=size_k, diff --git a/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/marlin_utils_test.py b/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/marlin_utils_test.py index 7d4f5f3cfbb872bf7b32e0972d6143b43f354a5e..b03555f5bffa8d48187d37e53f17e6e3138472cc 100644 --- a/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/marlin_utils_test.py +++ b/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/marlin_utils_test.py @@ -5,8 +5,7 @@ from typing import List, Optional import numpy as np import torch -from quantization.scalar_type import ScalarType - +from ..scalar_type import ScalarType from .marlin_utils import GPTQ_MARLIN_TILE, marlin_permute_scales, marlin_zero_points from .quant_utils import ( get_pack_factor, diff --git a/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/marlin_utils_test_24.py b/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/marlin_utils_test_24.py index 927fa9016ba25f381c09d768db0c468066193a76..1c907245c66417ff8715f18474024a1c20c316a7 100644 --- a/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/marlin_utils_test_24.py +++ b/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/marlin_utils_test_24.py @@ -6,8 +6,7 @@ from typing import List import numpy import torch -from quantization.scalar_type import ScalarType - +from ..scalar_type import ScalarType from .marlin_utils_test import marlin_weights from .quant_utils import gptq_quantize_weights diff --git a/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/quant_utils.py b/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/quant_utils.py index d97e03913fa5980e0be73b160088c8e4f5f49a52..65c90821773b60cca2b0f9102ca7ad4e3cd009d5 100644 --- a/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/quant_utils.py +++ b/build/torch26-cxx98-cu118-x86_64-linux/quantization/utils/quant_utils.py @@ -5,7 +5,7 @@ from typing import List, Optional import numpy import torch -from quantization.scalar_type import ScalarType, scalar_types +from ..scalar_type import ScalarType, scalar_types SUPPORTED_GPTQ_QUANT_TYPES = [scalar_types.uint4b8, scalar_types.uint8b128] SUPPORTED_GROUP_SIZES = [-1, 32, 64, 128] diff --git a/build/torch26-cxx98-cu124-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc b/build/torch26-cxx98-cu124-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4941f6fe9f97cb6eb6249fa71b3b6384a0479d0a Binary files /dev/null and b/build/torch26-cxx98-cu124-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu124-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc b/build/torch26-cxx98-cu124-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7871a5bcfbadf308c043ca32fa3ed2f47872a643 Binary files /dev/null and b/build/torch26-cxx98-cu124-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu124-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc b/build/torch26-cxx98-cu124-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..95db348b3880eb2b92020484290f1a1ddbc6a000 Binary files /dev/null and b/build/torch26-cxx98-cu124-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu124-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc b/build/torch26-cxx98-cu124-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..87c77b011984fccd026e552a52bfd8fbb8e416ee Binary files /dev/null and b/build/torch26-cxx98-cu124-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu124-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc b/build/torch26-cxx98-cu124-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..76f76ae9c2bc83d5346d28fd9b83d830607dd893 Binary files /dev/null and b/build/torch26-cxx98-cu124-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu124-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc b/build/torch26-cxx98-cu124-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5e92c1a5193e59f54885a0e24ffd9f16152e07a1 Binary files /dev/null and b/build/torch26-cxx98-cu124-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu124-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc b/build/torch26-cxx98-cu124-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fd2d4724d685ff583c3cb25a8f9f40eb31f2b429 Binary files /dev/null and b/build/torch26-cxx98-cu124-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu124-x86_64-linux/quantization/_ops.py b/build/torch26-cxx98-cu124-x86_64-linux/quantization/_ops.py index 046a27db01752cf820b0418cc0cd195afc362c86..f607840072f0680e47fea4e204f29a7c1d4d5d43 100644 --- a/build/torch26-cxx98-cu124-x86_64-linux/quantization/_ops.py +++ b/build/torch26-cxx98-cu124-x86_64-linux/quantization/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _quantization_dfa7d18 -ops = torch.ops._quantization_dfa7d18 +from . import _quantization_3313895 +ops = torch.ops._quantization_3313895 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_quantization_dfa7d18::{op_name}" \ No newline at end of file + return f"_quantization_3313895::{op_name}" \ No newline at end of file diff --git a/build/torch26-cxx98-cu124-x86_64-linux/quantization/_quantization_3313895.abi3.so b/build/torch26-cxx98-cu124-x86_64-linux/quantization/_quantization_3313895.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..c1f6aa674798331683fb7370ace68e34c9d2cd6e --- /dev/null +++ b/build/torch26-cxx98-cu124-x86_64-linux/quantization/_quantization_3313895.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10eb83edf122932dd9b93ea12b0924be04f46f0b085eb72954e413b7679d0149 +size 159570240 diff --git a/build/torch26-cxx98-cu124-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so b/build/torch26-cxx98-cu124-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so deleted file mode 100755 index 397995f1fad32b4374594e94d290edac89b37918..0000000000000000000000000000000000000000 --- a/build/torch26-cxx98-cu124-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3fa6583683394285f5d1c65f808a967b2db197831a097c638400b06a544187ba -size 159570240 diff --git a/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc b/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3e8b57e0afd753d2881ce59bd19418302fcad454 Binary files /dev/null and b/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc b/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c7f01af529100a5f69fdc9f3ae5ee8fe1a58e814 Binary files /dev/null and b/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc b/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e773b3db7f6799dc2b9a4a950287c0a506bb3ffa Binary files /dev/null and b/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc b/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d63930666171e69e08efa977d56b48bd25815563 Binary files /dev/null and b/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc b/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..60a2fec91b02d7a84731a2fe19c732533231ede1 Binary files /dev/null and b/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/marlin_utils.py b/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/marlin_utils.py index eb2f41d72984bdfbe03a6d71b632371025156448..d587f60d60f9e9f32410d3b12bed8c51fa8e6822 100644 --- a/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/marlin_utils.py +++ b/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/marlin_utils.py @@ -6,8 +6,7 @@ from typing import Optional import numpy import torch -import quantization as ops -from quantization.scalar_type import ScalarType, scalar_types +from .. import ScalarType, gptq_marlin_gemm, scalar_types from .quant_utils import pack_cols, unpack_cols @@ -383,7 +382,7 @@ def apply_gptq_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(reshaped_x, + output = gptq_marlin_gemm(reshaped_x, None, weight, weight_scale, @@ -429,7 +428,7 @@ def apply_awq_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(reshaped_x, + output = gptq_marlin_gemm(reshaped_x, None, weight, weight_scale, diff --git a/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/marlin_utils_fp4.py b/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/marlin_utils_fp4.py index b6697e1394328f52681dd2b8870fe826d9be5ba3..44348f6491cba69fa04df9f3b09cafac24df0565 100644 --- a/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/marlin_utils_fp4.py +++ b/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/marlin_utils_fp4.py @@ -5,12 +5,11 @@ from typing import Optional import torch -import quantization as ops - +from .. import gptq_marlin_gemm, gptq_marlin_repack from .marlin_utils import ( USE_FP32_REDUCE_DEFAULT, marlin_make_workspace_new, marlin_permute_scales, should_use_atomic_add_reduce) -from quantization.scalar_type import scalar_types +from ..scalar_type import scalar_types FP4_MARLIN_SUPPORTED_GROUP_SIZES = [16] @@ -90,7 +89,7 @@ def apply_fp4_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(a=reshaped_x, + output = gptq_marlin_gemm(a=reshaped_x, c=None, b_q_weight=weight, b_scales=weight_scale, @@ -135,7 +134,7 @@ def prepare_fp4_layer_for_marlin(layer: torch.nn.Module) -> None: perm = torch.empty(0, dtype=torch.int, device=device) qweight = layer.weight.view(torch.int32).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight, + marlin_qweight = gptq_marlin_repack(b_q_weight=qweight, perm=perm, size_k=part_size_k, size_n=part_size_n, @@ -192,7 +191,7 @@ def prepare_moe_fp4_layer_for_marlin(layer: torch.nn.Module) -> None: for i in range(e): qweight = weight[i].view(torch.int32).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight, + marlin_qweight = gptq_marlin_repack(b_q_weight=qweight, perm=perm, size_k=size_k, size_n=size_n, @@ -263,7 +262,7 @@ def rand_marlin_weight_fp4_like(weight, group_size): weight_ref = weight_ref * global_scale.to(weight.dtype) * \ scales.repeat_interleave(group_size, 1).to(weight.dtype) - marlin_qweight = ops.gptq_marlin_repack( + marlin_qweight = gptq_marlin_repack( b_q_weight=fp4_weight.view(torch.int32).T.contiguous(), perm=torch.empty(0, dtype=torch.int, device=device), size_k=size_k, diff --git a/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/marlin_utils_fp8.py b/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/marlin_utils_fp8.py index b38fe2d4aff0234cdbf08218da6440b4892e01f0..4ebd749729cfc3c8ada3a85b67b85a11b0f97a5d 100644 --- a/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/marlin_utils_fp8.py +++ b/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/marlin_utils_fp8.py @@ -5,7 +5,7 @@ from typing import Optional import torch -import quantization as ops +from .. import gptq_marlin_gemm, gptq_marlin_repack from .marlin_utils import USE_FP32_REDUCE_DEFAULT, marlin_make_workspace, marlin_permute_scales @@ -51,7 +51,7 @@ def apply_fp8_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(a=reshaped_x, + output = gptq_marlin_gemm(a=reshaped_x, c=None, b_q_weight=weight, b_scales=weight_scale, @@ -104,7 +104,7 @@ def marlin_quant_fp8_torch(weight, group_size): weight_ref = fp8_weight.to(weight.dtype) * repeated_scales packed_weight = pack_fp8_to_int32(fp8_weight, False).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack( + marlin_qweight = gptq_marlin_repack( b_q_weight=packed_weight, perm=torch.empty(0, dtype=torch.int, device=device), size_k=size_k, diff --git a/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/marlin_utils_test.py b/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/marlin_utils_test.py index 7d4f5f3cfbb872bf7b32e0972d6143b43f354a5e..b03555f5bffa8d48187d37e53f17e6e3138472cc 100644 --- a/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/marlin_utils_test.py +++ b/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/marlin_utils_test.py @@ -5,8 +5,7 @@ from typing import List, Optional import numpy as np import torch -from quantization.scalar_type import ScalarType - +from ..scalar_type import ScalarType from .marlin_utils import GPTQ_MARLIN_TILE, marlin_permute_scales, marlin_zero_points from .quant_utils import ( get_pack_factor, diff --git a/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/marlin_utils_test_24.py b/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/marlin_utils_test_24.py index 927fa9016ba25f381c09d768db0c468066193a76..1c907245c66417ff8715f18474024a1c20c316a7 100644 --- a/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/marlin_utils_test_24.py +++ b/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/marlin_utils_test_24.py @@ -6,8 +6,7 @@ from typing import List import numpy import torch -from quantization.scalar_type import ScalarType - +from ..scalar_type import ScalarType from .marlin_utils_test import marlin_weights from .quant_utils import gptq_quantize_weights diff --git a/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/quant_utils.py b/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/quant_utils.py index d97e03913fa5980e0be73b160088c8e4f5f49a52..65c90821773b60cca2b0f9102ca7ad4e3cd009d5 100644 --- a/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/quant_utils.py +++ b/build/torch26-cxx98-cu124-x86_64-linux/quantization/utils/quant_utils.py @@ -5,7 +5,7 @@ from typing import List, Optional import numpy import torch -from quantization.scalar_type import ScalarType, scalar_types +from ..scalar_type import ScalarType, scalar_types SUPPORTED_GPTQ_QUANT_TYPES = [scalar_types.uint4b8, scalar_types.uint8b128] SUPPORTED_GROUP_SIZES = [-1, 32, 64, 128] diff --git a/build/torch26-cxx98-cu126-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc b/build/torch26-cxx98-cu126-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0a1152d5486ba6ce61ac7711f8231a0e9e0b3346 Binary files /dev/null and b/build/torch26-cxx98-cu126-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu126-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc b/build/torch26-cxx98-cu126-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..86822ab46c0f1f571d8b941703ad28860a300a7b Binary files /dev/null and b/build/torch26-cxx98-cu126-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu126-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc b/build/torch26-cxx98-cu126-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ec95b6197e67350376c7d5b7ab58587ae7885b7b Binary files /dev/null and b/build/torch26-cxx98-cu126-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu126-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc b/build/torch26-cxx98-cu126-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..30e7460413050b4d0145bdcd9b0e919e3708e02d Binary files /dev/null and b/build/torch26-cxx98-cu126-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu126-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc b/build/torch26-cxx98-cu126-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7def9f9eed610818e6163176a7a756a9c7b21927 Binary files /dev/null and b/build/torch26-cxx98-cu126-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu126-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc b/build/torch26-cxx98-cu126-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7c1d6d6c9d653e13bc63c1ea5294e1bdf3fd2fdc Binary files /dev/null and b/build/torch26-cxx98-cu126-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu126-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc b/build/torch26-cxx98-cu126-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f44303070a24e1ee1d0fe4bca113797e9f722cfe Binary files /dev/null and b/build/torch26-cxx98-cu126-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu126-x86_64-linux/quantization/_ops.py b/build/torch26-cxx98-cu126-x86_64-linux/quantization/_ops.py index 046a27db01752cf820b0418cc0cd195afc362c86..f607840072f0680e47fea4e204f29a7c1d4d5d43 100644 --- a/build/torch26-cxx98-cu126-x86_64-linux/quantization/_ops.py +++ b/build/torch26-cxx98-cu126-x86_64-linux/quantization/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _quantization_dfa7d18 -ops = torch.ops._quantization_dfa7d18 +from . import _quantization_3313895 +ops = torch.ops._quantization_3313895 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_quantization_dfa7d18::{op_name}" \ No newline at end of file + return f"_quantization_3313895::{op_name}" \ No newline at end of file diff --git a/build/torch26-cxx98-cu126-x86_64-linux/quantization/_quantization_3313895.abi3.so b/build/torch26-cxx98-cu126-x86_64-linux/quantization/_quantization_3313895.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..29ec02fb44f91de67eadda399323ca7fcd80be66 --- /dev/null +++ b/build/torch26-cxx98-cu126-x86_64-linux/quantization/_quantization_3313895.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e26bb3368be70f1a1928c1b713ed19078362c968ff6ebc006ad08a415a75e4f +size 160274384 diff --git a/build/torch26-cxx98-cu126-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so b/build/torch26-cxx98-cu126-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so deleted file mode 100755 index f5aecaf120c55fc203d933de6e9ba5d0b60531b9..0000000000000000000000000000000000000000 --- a/build/torch26-cxx98-cu126-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:027e39213c07a0d90a7cbd3ea7f7e7415d9a4d561e2d774ab6212512e0452007 -size 160278472 diff --git a/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc b/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..565d44a3016300a613bb54042030a3e2c4fe75f8 Binary files /dev/null and b/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc b/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..36c82f876041e1f4fe45e37b5b62378c4042c399 Binary files /dev/null and b/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc b/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b9c857479c6438592448d627ff091ef1041f7e93 Binary files /dev/null and b/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc b/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6dba0e07a3593d80314b04b32963ce8120660dd3 Binary files /dev/null and b/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc b/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9330d87747baf83a3d93ad84ded249abefdddf4c Binary files /dev/null and b/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/marlin_utils.py b/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/marlin_utils.py index eb2f41d72984bdfbe03a6d71b632371025156448..d587f60d60f9e9f32410d3b12bed8c51fa8e6822 100644 --- a/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/marlin_utils.py +++ b/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/marlin_utils.py @@ -6,8 +6,7 @@ from typing import Optional import numpy import torch -import quantization as ops -from quantization.scalar_type import ScalarType, scalar_types +from .. import ScalarType, gptq_marlin_gemm, scalar_types from .quant_utils import pack_cols, unpack_cols @@ -383,7 +382,7 @@ def apply_gptq_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(reshaped_x, + output = gptq_marlin_gemm(reshaped_x, None, weight, weight_scale, @@ -429,7 +428,7 @@ def apply_awq_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(reshaped_x, + output = gptq_marlin_gemm(reshaped_x, None, weight, weight_scale, diff --git a/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/marlin_utils_fp4.py b/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/marlin_utils_fp4.py index b6697e1394328f52681dd2b8870fe826d9be5ba3..44348f6491cba69fa04df9f3b09cafac24df0565 100644 --- a/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/marlin_utils_fp4.py +++ b/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/marlin_utils_fp4.py @@ -5,12 +5,11 @@ from typing import Optional import torch -import quantization as ops - +from .. import gptq_marlin_gemm, gptq_marlin_repack from .marlin_utils import ( USE_FP32_REDUCE_DEFAULT, marlin_make_workspace_new, marlin_permute_scales, should_use_atomic_add_reduce) -from quantization.scalar_type import scalar_types +from ..scalar_type import scalar_types FP4_MARLIN_SUPPORTED_GROUP_SIZES = [16] @@ -90,7 +89,7 @@ def apply_fp4_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(a=reshaped_x, + output = gptq_marlin_gemm(a=reshaped_x, c=None, b_q_weight=weight, b_scales=weight_scale, @@ -135,7 +134,7 @@ def prepare_fp4_layer_for_marlin(layer: torch.nn.Module) -> None: perm = torch.empty(0, dtype=torch.int, device=device) qweight = layer.weight.view(torch.int32).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight, + marlin_qweight = gptq_marlin_repack(b_q_weight=qweight, perm=perm, size_k=part_size_k, size_n=part_size_n, @@ -192,7 +191,7 @@ def prepare_moe_fp4_layer_for_marlin(layer: torch.nn.Module) -> None: for i in range(e): qweight = weight[i].view(torch.int32).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight, + marlin_qweight = gptq_marlin_repack(b_q_weight=qweight, perm=perm, size_k=size_k, size_n=size_n, @@ -263,7 +262,7 @@ def rand_marlin_weight_fp4_like(weight, group_size): weight_ref = weight_ref * global_scale.to(weight.dtype) * \ scales.repeat_interleave(group_size, 1).to(weight.dtype) - marlin_qweight = ops.gptq_marlin_repack( + marlin_qweight = gptq_marlin_repack( b_q_weight=fp4_weight.view(torch.int32).T.contiguous(), perm=torch.empty(0, dtype=torch.int, device=device), size_k=size_k, diff --git a/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/marlin_utils_fp8.py b/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/marlin_utils_fp8.py index b38fe2d4aff0234cdbf08218da6440b4892e01f0..4ebd749729cfc3c8ada3a85b67b85a11b0f97a5d 100644 --- a/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/marlin_utils_fp8.py +++ b/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/marlin_utils_fp8.py @@ -5,7 +5,7 @@ from typing import Optional import torch -import quantization as ops +from .. import gptq_marlin_gemm, gptq_marlin_repack from .marlin_utils import USE_FP32_REDUCE_DEFAULT, marlin_make_workspace, marlin_permute_scales @@ -51,7 +51,7 @@ def apply_fp8_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(a=reshaped_x, + output = gptq_marlin_gemm(a=reshaped_x, c=None, b_q_weight=weight, b_scales=weight_scale, @@ -104,7 +104,7 @@ def marlin_quant_fp8_torch(weight, group_size): weight_ref = fp8_weight.to(weight.dtype) * repeated_scales packed_weight = pack_fp8_to_int32(fp8_weight, False).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack( + marlin_qweight = gptq_marlin_repack( b_q_weight=packed_weight, perm=torch.empty(0, dtype=torch.int, device=device), size_k=size_k, diff --git a/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/marlin_utils_test.py b/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/marlin_utils_test.py index 7d4f5f3cfbb872bf7b32e0972d6143b43f354a5e..b03555f5bffa8d48187d37e53f17e6e3138472cc 100644 --- a/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/marlin_utils_test.py +++ b/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/marlin_utils_test.py @@ -5,8 +5,7 @@ from typing import List, Optional import numpy as np import torch -from quantization.scalar_type import ScalarType - +from ..scalar_type import ScalarType from .marlin_utils import GPTQ_MARLIN_TILE, marlin_permute_scales, marlin_zero_points from .quant_utils import ( get_pack_factor, diff --git a/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/marlin_utils_test_24.py b/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/marlin_utils_test_24.py index 927fa9016ba25f381c09d768db0c468066193a76..1c907245c66417ff8715f18474024a1c20c316a7 100644 --- a/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/marlin_utils_test_24.py +++ b/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/marlin_utils_test_24.py @@ -6,8 +6,7 @@ from typing import List import numpy import torch -from quantization.scalar_type import ScalarType - +from ..scalar_type import ScalarType from .marlin_utils_test import marlin_weights from .quant_utils import gptq_quantize_weights diff --git a/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/quant_utils.py b/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/quant_utils.py index d97e03913fa5980e0be73b160088c8e4f5f49a52..65c90821773b60cca2b0f9102ca7ad4e3cd009d5 100644 --- a/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/quant_utils.py +++ b/build/torch26-cxx98-cu126-x86_64-linux/quantization/utils/quant_utils.py @@ -5,7 +5,7 @@ from typing import List, Optional import numpy import torch -from quantization.scalar_type import ScalarType, scalar_types +from ..scalar_type import ScalarType, scalar_types SUPPORTED_GPTQ_QUANT_TYPES = [scalar_types.uint4b8, scalar_types.uint8b128] SUPPORTED_GROUP_SIZES = [-1, 32, 64, 128] diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc b/build/torch27-cxx11-cu118-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e5a1e4e948ce838cdb49f2b251cd80371a8365bd Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc b/build/torch27-cxx11-cu118-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..522326a9f513b1aa612d88bb5f9926c0f85864a0 Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc b/build/torch27-cxx11-cu118-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..177a7f93bb102e42bd3dff2156ca35ab00e0a263 Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc b/build/torch27-cxx11-cu118-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..357eef6988c0a9b23adaf65313c21796d9fe2a49 Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc b/build/torch27-cxx11-cu118-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..649d12aefa9665cf44351a3082eef867c0249e66 Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc b/build/torch27-cxx11-cu118-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b067b9b0dcde1cfdfb92863a34c2c8af1ad568f6 Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc b/build/torch27-cxx11-cu118-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..56aad01432f24cb50f286ef11aaca5d7c4864ffe Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization/_ops.py b/build/torch27-cxx11-cu118-x86_64-linux/quantization/_ops.py index 046a27db01752cf820b0418cc0cd195afc362c86..f607840072f0680e47fea4e204f29a7c1d4d5d43 100644 --- a/build/torch27-cxx11-cu118-x86_64-linux/quantization/_ops.py +++ b/build/torch27-cxx11-cu118-x86_64-linux/quantization/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _quantization_dfa7d18 -ops = torch.ops._quantization_dfa7d18 +from . import _quantization_3313895 +ops = torch.ops._quantization_3313895 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_quantization_dfa7d18::{op_name}" \ No newline at end of file + return f"_quantization_3313895::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization/_quantization_3313895.abi3.so b/build/torch27-cxx11-cu118-x86_64-linux/quantization/_quantization_3313895.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..91420d57eabfaf52e8a756cf6b531ded888dfae3 --- /dev/null +++ b/build/torch27-cxx11-cu118-x86_64-linux/quantization/_quantization_3313895.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af5bba0c8cdcb83d74211c96ec3fc0643d63dc56a3a3c0f786affac357b1d969 +size 155756776 diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so b/build/torch27-cxx11-cu118-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so deleted file mode 100755 index 4bc676d907393c1ac4a02d4e479da7b1faf02c43..0000000000000000000000000000000000000000 --- a/build/torch27-cxx11-cu118-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:18dc876a3fd8d78af10311486db850cfa1905b6d5cc308a72f44bc0704bc91e6 -size 155752576 diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc b/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..674bca51e755b945b66057d975b5b0c0785a7237 Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc b/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cce9242f9a22cf1afcb74dd01bf207b69756bf62 Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc b/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cf9dc2b11a3a5d5e90fa07b54b79ce47232e2617 Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc b/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e1048c05ea08795aa2f3f1389544459f08658675 Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc b/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9471afa154ef0b9a7439c69b9fd1a0dd196339b3 Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils.py b/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils.py index eb2f41d72984bdfbe03a6d71b632371025156448..d587f60d60f9e9f32410d3b12bed8c51fa8e6822 100644 --- a/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils.py +++ b/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils.py @@ -6,8 +6,7 @@ from typing import Optional import numpy import torch -import quantization as ops -from quantization.scalar_type import ScalarType, scalar_types +from .. import ScalarType, gptq_marlin_gemm, scalar_types from .quant_utils import pack_cols, unpack_cols @@ -383,7 +382,7 @@ def apply_gptq_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(reshaped_x, + output = gptq_marlin_gemm(reshaped_x, None, weight, weight_scale, @@ -429,7 +428,7 @@ def apply_awq_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(reshaped_x, + output = gptq_marlin_gemm(reshaped_x, None, weight, weight_scale, diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_fp4.py b/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_fp4.py index b6697e1394328f52681dd2b8870fe826d9be5ba3..44348f6491cba69fa04df9f3b09cafac24df0565 100644 --- a/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_fp4.py +++ b/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_fp4.py @@ -5,12 +5,11 @@ from typing import Optional import torch -import quantization as ops - +from .. import gptq_marlin_gemm, gptq_marlin_repack from .marlin_utils import ( USE_FP32_REDUCE_DEFAULT, marlin_make_workspace_new, marlin_permute_scales, should_use_atomic_add_reduce) -from quantization.scalar_type import scalar_types +from ..scalar_type import scalar_types FP4_MARLIN_SUPPORTED_GROUP_SIZES = [16] @@ -90,7 +89,7 @@ def apply_fp4_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(a=reshaped_x, + output = gptq_marlin_gemm(a=reshaped_x, c=None, b_q_weight=weight, b_scales=weight_scale, @@ -135,7 +134,7 @@ def prepare_fp4_layer_for_marlin(layer: torch.nn.Module) -> None: perm = torch.empty(0, dtype=torch.int, device=device) qweight = layer.weight.view(torch.int32).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight, + marlin_qweight = gptq_marlin_repack(b_q_weight=qweight, perm=perm, size_k=part_size_k, size_n=part_size_n, @@ -192,7 +191,7 @@ def prepare_moe_fp4_layer_for_marlin(layer: torch.nn.Module) -> None: for i in range(e): qweight = weight[i].view(torch.int32).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight, + marlin_qweight = gptq_marlin_repack(b_q_weight=qweight, perm=perm, size_k=size_k, size_n=size_n, @@ -263,7 +262,7 @@ def rand_marlin_weight_fp4_like(weight, group_size): weight_ref = weight_ref * global_scale.to(weight.dtype) * \ scales.repeat_interleave(group_size, 1).to(weight.dtype) - marlin_qweight = ops.gptq_marlin_repack( + marlin_qweight = gptq_marlin_repack( b_q_weight=fp4_weight.view(torch.int32).T.contiguous(), perm=torch.empty(0, dtype=torch.int, device=device), size_k=size_k, diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_fp8.py b/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_fp8.py index b38fe2d4aff0234cdbf08218da6440b4892e01f0..4ebd749729cfc3c8ada3a85b67b85a11b0f97a5d 100644 --- a/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_fp8.py +++ b/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_fp8.py @@ -5,7 +5,7 @@ from typing import Optional import torch -import quantization as ops +from .. import gptq_marlin_gemm, gptq_marlin_repack from .marlin_utils import USE_FP32_REDUCE_DEFAULT, marlin_make_workspace, marlin_permute_scales @@ -51,7 +51,7 @@ def apply_fp8_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(a=reshaped_x, + output = gptq_marlin_gemm(a=reshaped_x, c=None, b_q_weight=weight, b_scales=weight_scale, @@ -104,7 +104,7 @@ def marlin_quant_fp8_torch(weight, group_size): weight_ref = fp8_weight.to(weight.dtype) * repeated_scales packed_weight = pack_fp8_to_int32(fp8_weight, False).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack( + marlin_qweight = gptq_marlin_repack( b_q_weight=packed_weight, perm=torch.empty(0, dtype=torch.int, device=device), size_k=size_k, diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_test.py b/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_test.py index 7d4f5f3cfbb872bf7b32e0972d6143b43f354a5e..b03555f5bffa8d48187d37e53f17e6e3138472cc 100644 --- a/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_test.py +++ b/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_test.py @@ -5,8 +5,7 @@ from typing import List, Optional import numpy as np import torch -from quantization.scalar_type import ScalarType - +from ..scalar_type import ScalarType from .marlin_utils import GPTQ_MARLIN_TILE, marlin_permute_scales, marlin_zero_points from .quant_utils import ( get_pack_factor, diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_test_24.py b/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_test_24.py index 927fa9016ba25f381c09d768db0c468066193a76..1c907245c66417ff8715f18474024a1c20c316a7 100644 --- a/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_test_24.py +++ b/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_test_24.py @@ -6,8 +6,7 @@ from typing import List import numpy import torch -from quantization.scalar_type import ScalarType - +from ..scalar_type import ScalarType from .marlin_utils_test import marlin_weights from .quant_utils import gptq_quantize_weights diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/quant_utils.py b/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/quant_utils.py index d97e03913fa5980e0be73b160088c8e4f5f49a52..65c90821773b60cca2b0f9102ca7ad4e3cd009d5 100644 --- a/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/quant_utils.py +++ b/build/torch27-cxx11-cu118-x86_64-linux/quantization/utils/quant_utils.py @@ -5,7 +5,7 @@ from typing import List, Optional import numpy import torch -from quantization.scalar_type import ScalarType, scalar_types +from ..scalar_type import ScalarType, scalar_types SUPPORTED_GPTQ_QUANT_TYPES = [scalar_types.uint4b8, scalar_types.uint8b128] SUPPORTED_GROUP_SIZES = [-1, 32, 64, 128] diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc b/build/torch27-cxx11-cu126-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..baee239f335bc94ffbdb0a821e8ee02c8e024fa1 Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc b/build/torch27-cxx11-cu126-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8b36b2007adc14c0f583218563a8d384f36edb8f Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc b/build/torch27-cxx11-cu126-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5719167058442774ba46cf4edad70abb7a99df92 Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc b/build/torch27-cxx11-cu126-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..001e0b8c52650cbc88208551fb27325efcdc075a Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc b/build/torch27-cxx11-cu126-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f3a7a72b0cfc4fc39c6d89d9a3ffb352473ff973 Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc b/build/torch27-cxx11-cu126-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fce4ef4818f62332b7cf69f6c31fbdf3f3c4b124 Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc b/build/torch27-cxx11-cu126-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8e44c14acabb882dc52e117857795e621a730a71 Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization/_ops.py b/build/torch27-cxx11-cu126-x86_64-linux/quantization/_ops.py index 046a27db01752cf820b0418cc0cd195afc362c86..f607840072f0680e47fea4e204f29a7c1d4d5d43 100644 --- a/build/torch27-cxx11-cu126-x86_64-linux/quantization/_ops.py +++ b/build/torch27-cxx11-cu126-x86_64-linux/quantization/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _quantization_dfa7d18 -ops = torch.ops._quantization_dfa7d18 +from . import _quantization_3313895 +ops = torch.ops._quantization_3313895 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_quantization_dfa7d18::{op_name}" \ No newline at end of file + return f"_quantization_3313895::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization/_quantization_3313895.abi3.so b/build/torch27-cxx11-cu126-x86_64-linux/quantization/_quantization_3313895.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..37f58ff8752bd9d34ddf7083f6df13b555ca093c --- /dev/null +++ b/build/torch27-cxx11-cu126-x86_64-linux/quantization/_quantization_3313895.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d0e54281acaf08e57a0bc219fb45b1144ed3c144fe07c3f58eeaba0ce111c45 +size 160280656 diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so b/build/torch27-cxx11-cu126-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so deleted file mode 100755 index def3c910615258d745f3f82bb8ac74f33b71d72c..0000000000000000000000000000000000000000 --- a/build/torch27-cxx11-cu126-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4e49acf1fe6df71b16edbf8cafc8ba41dbbda45e569b20b867bd8404a8f34db9 -size 160284752 diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc b/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a430d448463ccbffee96a6d9246ac56094cfde50 Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc b/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..42fc18300f93ea9a53cf04be41223043769c4edb Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc b/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..671c779c6775c9cbdef5a4eb4b92fcb668d169d6 Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc b/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1cca3f73fce350ce34368d9a09beb428ed9cf72b Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc b/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..60d2c1dbd29268f4b5726fe07cc654fca6db4acf Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils.py b/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils.py index eb2f41d72984bdfbe03a6d71b632371025156448..d587f60d60f9e9f32410d3b12bed8c51fa8e6822 100644 --- a/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils.py +++ b/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils.py @@ -6,8 +6,7 @@ from typing import Optional import numpy import torch -import quantization as ops -from quantization.scalar_type import ScalarType, scalar_types +from .. import ScalarType, gptq_marlin_gemm, scalar_types from .quant_utils import pack_cols, unpack_cols @@ -383,7 +382,7 @@ def apply_gptq_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(reshaped_x, + output = gptq_marlin_gemm(reshaped_x, None, weight, weight_scale, @@ -429,7 +428,7 @@ def apply_awq_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(reshaped_x, + output = gptq_marlin_gemm(reshaped_x, None, weight, weight_scale, diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_fp4.py b/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_fp4.py index b6697e1394328f52681dd2b8870fe826d9be5ba3..44348f6491cba69fa04df9f3b09cafac24df0565 100644 --- a/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_fp4.py +++ b/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_fp4.py @@ -5,12 +5,11 @@ from typing import Optional import torch -import quantization as ops - +from .. import gptq_marlin_gemm, gptq_marlin_repack from .marlin_utils import ( USE_FP32_REDUCE_DEFAULT, marlin_make_workspace_new, marlin_permute_scales, should_use_atomic_add_reduce) -from quantization.scalar_type import scalar_types +from ..scalar_type import scalar_types FP4_MARLIN_SUPPORTED_GROUP_SIZES = [16] @@ -90,7 +89,7 @@ def apply_fp4_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(a=reshaped_x, + output = gptq_marlin_gemm(a=reshaped_x, c=None, b_q_weight=weight, b_scales=weight_scale, @@ -135,7 +134,7 @@ def prepare_fp4_layer_for_marlin(layer: torch.nn.Module) -> None: perm = torch.empty(0, dtype=torch.int, device=device) qweight = layer.weight.view(torch.int32).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight, + marlin_qweight = gptq_marlin_repack(b_q_weight=qweight, perm=perm, size_k=part_size_k, size_n=part_size_n, @@ -192,7 +191,7 @@ def prepare_moe_fp4_layer_for_marlin(layer: torch.nn.Module) -> None: for i in range(e): qweight = weight[i].view(torch.int32).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight, + marlin_qweight = gptq_marlin_repack(b_q_weight=qweight, perm=perm, size_k=size_k, size_n=size_n, @@ -263,7 +262,7 @@ def rand_marlin_weight_fp4_like(weight, group_size): weight_ref = weight_ref * global_scale.to(weight.dtype) * \ scales.repeat_interleave(group_size, 1).to(weight.dtype) - marlin_qweight = ops.gptq_marlin_repack( + marlin_qweight = gptq_marlin_repack( b_q_weight=fp4_weight.view(torch.int32).T.contiguous(), perm=torch.empty(0, dtype=torch.int, device=device), size_k=size_k, diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_fp8.py b/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_fp8.py index b38fe2d4aff0234cdbf08218da6440b4892e01f0..4ebd749729cfc3c8ada3a85b67b85a11b0f97a5d 100644 --- a/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_fp8.py +++ b/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_fp8.py @@ -5,7 +5,7 @@ from typing import Optional import torch -import quantization as ops +from .. import gptq_marlin_gemm, gptq_marlin_repack from .marlin_utils import USE_FP32_REDUCE_DEFAULT, marlin_make_workspace, marlin_permute_scales @@ -51,7 +51,7 @@ def apply_fp8_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(a=reshaped_x, + output = gptq_marlin_gemm(a=reshaped_x, c=None, b_q_weight=weight, b_scales=weight_scale, @@ -104,7 +104,7 @@ def marlin_quant_fp8_torch(weight, group_size): weight_ref = fp8_weight.to(weight.dtype) * repeated_scales packed_weight = pack_fp8_to_int32(fp8_weight, False).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack( + marlin_qweight = gptq_marlin_repack( b_q_weight=packed_weight, perm=torch.empty(0, dtype=torch.int, device=device), size_k=size_k, diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_test.py b/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_test.py index 7d4f5f3cfbb872bf7b32e0972d6143b43f354a5e..b03555f5bffa8d48187d37e53f17e6e3138472cc 100644 --- a/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_test.py +++ b/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_test.py @@ -5,8 +5,7 @@ from typing import List, Optional import numpy as np import torch -from quantization.scalar_type import ScalarType - +from ..scalar_type import ScalarType from .marlin_utils import GPTQ_MARLIN_TILE, marlin_permute_scales, marlin_zero_points from .quant_utils import ( get_pack_factor, diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_test_24.py b/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_test_24.py index 927fa9016ba25f381c09d768db0c468066193a76..1c907245c66417ff8715f18474024a1c20c316a7 100644 --- a/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_test_24.py +++ b/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/marlin_utils_test_24.py @@ -6,8 +6,7 @@ from typing import List import numpy import torch -from quantization.scalar_type import ScalarType - +from ..scalar_type import ScalarType from .marlin_utils_test import marlin_weights from .quant_utils import gptq_quantize_weights diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/quant_utils.py b/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/quant_utils.py index d97e03913fa5980e0be73b160088c8e4f5f49a52..65c90821773b60cca2b0f9102ca7ad4e3cd009d5 100644 --- a/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/quant_utils.py +++ b/build/torch27-cxx11-cu126-x86_64-linux/quantization/utils/quant_utils.py @@ -5,7 +5,7 @@ from typing import List, Optional import numpy import torch -from quantization.scalar_type import ScalarType, scalar_types +from ..scalar_type import ScalarType, scalar_types SUPPORTED_GPTQ_QUANT_TYPES = [scalar_types.uint4b8, scalar_types.uint8b128] SUPPORTED_GROUP_SIZES = [-1, 32, 64, 128] diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc b/build/torch27-cxx11-cu128-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fd7981c9a06e0604c74deb10182fb32ad537b648 Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc b/build/torch27-cxx11-cu128-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..24e5ea4e89ccdc36960e2062d9713a5ac75d01bd Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc b/build/torch27-cxx11-cu128-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9d48872e3c1c7974edeec89cdb627e9e15251fc0 Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc b/build/torch27-cxx11-cu128-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3bb55404ca97c02589a7d6324a95a07aa4acf132 Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc b/build/torch27-cxx11-cu128-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b0f61d5f0dee9c9a72e2fca19119bc15026c027b Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc b/build/torch27-cxx11-cu128-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bddbf7991aa8083a9658188adb4598fa8b2fb5d3 Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc b/build/torch27-cxx11-cu128-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..21ba889a452b1f49cf85713df874f6c0b86d11e5 Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization/_ops.py b/build/torch27-cxx11-cu128-x86_64-linux/quantization/_ops.py index 046a27db01752cf820b0418cc0cd195afc362c86..f607840072f0680e47fea4e204f29a7c1d4d5d43 100644 --- a/build/torch27-cxx11-cu128-x86_64-linux/quantization/_ops.py +++ b/build/torch27-cxx11-cu128-x86_64-linux/quantization/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _quantization_dfa7d18 -ops = torch.ops._quantization_dfa7d18 +from . import _quantization_3313895 +ops = torch.ops._quantization_3313895 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_quantization_dfa7d18::{op_name}" \ No newline at end of file + return f"_quantization_3313895::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization/_quantization_3313895.abi3.so b/build/torch27-cxx11-cu128-x86_64-linux/quantization/_quantization_3313895.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..ae4f50295ac20970cbec84d7b729192995e9ae6e --- /dev/null +++ b/build/torch27-cxx11-cu128-x86_64-linux/quantization/_quantization_3313895.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08d62d8c55a741d331a6c389f0f216d8f754b5d830a7611c42e6e10893880ca1 +size 297102992 diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so b/build/torch27-cxx11-cu128-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so deleted file mode 100755 index ad249d9bf305329d7052b7fe96ffc4f6f79a010a..0000000000000000000000000000000000000000 --- a/build/torch27-cxx11-cu128-x86_64-linux/quantization/_quantization_dfa7d18.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7c5b228ee9c669189c71da56a54be02d116cb733e17139b02344423fb768a4db -size 297102992 diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc b/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9e9c5c5343b09b92845d3f5dcb4d9d6d3586f44a Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc b/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..be37b8988bbe50b18bd3d4dcf60e82abe37c57f7 Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc b/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b3a7659441c5e4cdf317bcc09606768565faa387 Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc b/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..716d1e73f1288ea9e3dd12f39dafd3a8440bc38f Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc b/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..959008db871cceb5aac59f80aa8a3cbb8f22766a Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/marlin_utils.py b/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/marlin_utils.py index eb2f41d72984bdfbe03a6d71b632371025156448..d587f60d60f9e9f32410d3b12bed8c51fa8e6822 100644 --- a/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/marlin_utils.py +++ b/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/marlin_utils.py @@ -6,8 +6,7 @@ from typing import Optional import numpy import torch -import quantization as ops -from quantization.scalar_type import ScalarType, scalar_types +from .. import ScalarType, gptq_marlin_gemm, scalar_types from .quant_utils import pack_cols, unpack_cols @@ -383,7 +382,7 @@ def apply_gptq_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(reshaped_x, + output = gptq_marlin_gemm(reshaped_x, None, weight, weight_scale, @@ -429,7 +428,7 @@ def apply_awq_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(reshaped_x, + output = gptq_marlin_gemm(reshaped_x, None, weight, weight_scale, diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/marlin_utils_fp4.py b/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/marlin_utils_fp4.py index b6697e1394328f52681dd2b8870fe826d9be5ba3..44348f6491cba69fa04df9f3b09cafac24df0565 100644 --- a/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/marlin_utils_fp4.py +++ b/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/marlin_utils_fp4.py @@ -5,12 +5,11 @@ from typing import Optional import torch -import quantization as ops - +from .. import gptq_marlin_gemm, gptq_marlin_repack from .marlin_utils import ( USE_FP32_REDUCE_DEFAULT, marlin_make_workspace_new, marlin_permute_scales, should_use_atomic_add_reduce) -from quantization.scalar_type import scalar_types +from ..scalar_type import scalar_types FP4_MARLIN_SUPPORTED_GROUP_SIZES = [16] @@ -90,7 +89,7 @@ def apply_fp4_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(a=reshaped_x, + output = gptq_marlin_gemm(a=reshaped_x, c=None, b_q_weight=weight, b_scales=weight_scale, @@ -135,7 +134,7 @@ def prepare_fp4_layer_for_marlin(layer: torch.nn.Module) -> None: perm = torch.empty(0, dtype=torch.int, device=device) qweight = layer.weight.view(torch.int32).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight, + marlin_qweight = gptq_marlin_repack(b_q_weight=qweight, perm=perm, size_k=part_size_k, size_n=part_size_n, @@ -192,7 +191,7 @@ def prepare_moe_fp4_layer_for_marlin(layer: torch.nn.Module) -> None: for i in range(e): qweight = weight[i].view(torch.int32).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight, + marlin_qweight = gptq_marlin_repack(b_q_weight=qweight, perm=perm, size_k=size_k, size_n=size_n, @@ -263,7 +262,7 @@ def rand_marlin_weight_fp4_like(weight, group_size): weight_ref = weight_ref * global_scale.to(weight.dtype) * \ scales.repeat_interleave(group_size, 1).to(weight.dtype) - marlin_qweight = ops.gptq_marlin_repack( + marlin_qweight = gptq_marlin_repack( b_q_weight=fp4_weight.view(torch.int32).T.contiguous(), perm=torch.empty(0, dtype=torch.int, device=device), size_k=size_k, diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/marlin_utils_fp8.py b/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/marlin_utils_fp8.py index b38fe2d4aff0234cdbf08218da6440b4892e01f0..4ebd749729cfc3c8ada3a85b67b85a11b0f97a5d 100644 --- a/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/marlin_utils_fp8.py +++ b/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/marlin_utils_fp8.py @@ -5,7 +5,7 @@ from typing import Optional import torch -import quantization as ops +from .. import gptq_marlin_gemm, gptq_marlin_repack from .marlin_utils import USE_FP32_REDUCE_DEFAULT, marlin_make_workspace, marlin_permute_scales @@ -51,7 +51,7 @@ def apply_fp8_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(a=reshaped_x, + output = gptq_marlin_gemm(a=reshaped_x, c=None, b_q_weight=weight, b_scales=weight_scale, @@ -104,7 +104,7 @@ def marlin_quant_fp8_torch(weight, group_size): weight_ref = fp8_weight.to(weight.dtype) * repeated_scales packed_weight = pack_fp8_to_int32(fp8_weight, False).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack( + marlin_qweight = gptq_marlin_repack( b_q_weight=packed_weight, perm=torch.empty(0, dtype=torch.int, device=device), size_k=size_k, diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/marlin_utils_test.py b/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/marlin_utils_test.py index 7d4f5f3cfbb872bf7b32e0972d6143b43f354a5e..b03555f5bffa8d48187d37e53f17e6e3138472cc 100644 --- a/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/marlin_utils_test.py +++ b/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/marlin_utils_test.py @@ -5,8 +5,7 @@ from typing import List, Optional import numpy as np import torch -from quantization.scalar_type import ScalarType - +from ..scalar_type import ScalarType from .marlin_utils import GPTQ_MARLIN_TILE, marlin_permute_scales, marlin_zero_points from .quant_utils import ( get_pack_factor, diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/marlin_utils_test_24.py b/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/marlin_utils_test_24.py index 927fa9016ba25f381c09d768db0c468066193a76..1c907245c66417ff8715f18474024a1c20c316a7 100644 --- a/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/marlin_utils_test_24.py +++ b/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/marlin_utils_test_24.py @@ -6,8 +6,7 @@ from typing import List import numpy import torch -from quantization.scalar_type import ScalarType - +from ..scalar_type import ScalarType from .marlin_utils_test import marlin_weights from .quant_utils import gptq_quantize_weights diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/quant_utils.py b/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/quant_utils.py index d97e03913fa5980e0be73b160088c8e4f5f49a52..65c90821773b60cca2b0f9102ca7ad4e3cd009d5 100644 --- a/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/quant_utils.py +++ b/build/torch27-cxx11-cu128-x86_64-linux/quantization/utils/quant_utils.py @@ -5,7 +5,7 @@ from typing import List, Optional import numpy import torch -from quantization.scalar_type import ScalarType, scalar_types +from ..scalar_type import ScalarType, scalar_types SUPPORTED_GPTQ_QUANT_TYPES = [scalar_types.uint4b8, scalar_types.uint8b128] SUPPORTED_GROUP_SIZES = [-1, 32, 64, 128]