diff --git a/build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/__init__.cpython-312.pyc b/build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3cbf2e854ee3084e7f2f4681e2aa4e377943ea18 Binary files /dev/null and b/build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/__init__.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/_ops.cpython-312.pyc b/build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/_ops.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b2263d79badb4dca805b4baeab0c377e35b9155f Binary files /dev/null and b/build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/_ops.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc b/build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e58a76fbee916b5a82db699aaf3ec193914ef35b Binary files /dev/null and b/build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/cutlass.cpython-312.pyc b/build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/cutlass.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c84596411165092f8125be8b8ed7c6fbb0c387af Binary files /dev/null and b/build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/cutlass.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/marlin.cpython-312.pyc b/build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/marlin.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1dbd958de0c46a9a851bb1123f4f819b414bf2b5 Binary files /dev/null and b/build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/marlin.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/platforms.cpython-312.pyc b/build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/platforms.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c537bef81e8bf9503050d41b0a6583d8f087ec37 Binary files /dev/null and b/build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/platforms.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc b/build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d6dbd52db71871b78d80f85ccf9143faeb92e585 Binary files /dev/null and b/build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu126-aarch64-linux/quantization/_ops.py b/build/torch26-cxx11-cu126-aarch64-linux/quantization/_ops.py index 5f84ea61a2d0c3731d870d86900b212987b4edda..f607840072f0680e47fea4e204f29a7c1d4d5d43 100644 --- a/build/torch26-cxx11-cu126-aarch64-linux/quantization/_ops.py +++ b/build/torch26-cxx11-cu126-aarch64-linux/quantization/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _quantization_82ffd1f -ops = torch.ops._quantization_82ffd1f +from . import _quantization_3313895 +ops = torch.ops._quantization_3313895 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_quantization_82ffd1f::{op_name}" \ No newline at end of file + return f"_quantization_3313895::{op_name}" \ No newline at end of file diff --git a/build/torch26-cxx11-cu126-aarch64-linux/quantization/_quantization_3313895.abi3.so b/build/torch26-cxx11-cu126-aarch64-linux/quantization/_quantization_3313895.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..a8774a30c331b50c0f1ae077f41e900d985d19d7 --- /dev/null +++ b/build/torch26-cxx11-cu126-aarch64-linux/quantization/_quantization_3313895.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3ee57d2c1cc0ad50da66137aba7bc19177808ab669b04af56724343ff91ead0 +size 159999664 diff --git a/build/torch26-cxx11-cu126-aarch64-linux/quantization/_quantization_82ffd1f.abi3.so b/build/torch26-cxx11-cu126-aarch64-linux/quantization/_quantization_82ffd1f.abi3.so deleted file mode 100755 index c8f8001de3e22efec5f5f693a3f6d3a7900766da..0000000000000000000000000000000000000000 --- a/build/torch26-cxx11-cu126-aarch64-linux/quantization/_quantization_82ffd1f.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:163383785e3ca9a472f18c802591218f18ef3c9cde4bb83fa623575a8adfd085 -size 159999656 diff --git a/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc b/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a610a66b4052dfa29a6a4576bf33da9afb1892a5 Binary files /dev/null and b/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc b/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..44c55e6ac3a5b109ef1a2e9cfabe28be3a185f68 Binary files /dev/null and b/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc b/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b3ccdd61014ae241335a38d29f5f15f655d4fcbe Binary files /dev/null and b/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc b/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7a2e1839fac3685366e3e2bd1a2ea4ac7305ad51 Binary files /dev/null and b/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc b/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e7340602153936a03e468483df3a4735026a802c Binary files /dev/null and b/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc differ diff --git a/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils.py b/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils.py index eb2f41d72984bdfbe03a6d71b632371025156448..d587f60d60f9e9f32410d3b12bed8c51fa8e6822 100644 --- a/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils.py +++ b/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils.py @@ -6,8 +6,7 @@ from typing import Optional import numpy import torch -import quantization as ops -from quantization.scalar_type import ScalarType, scalar_types +from .. import ScalarType, gptq_marlin_gemm, scalar_types from .quant_utils import pack_cols, unpack_cols @@ -383,7 +382,7 @@ def apply_gptq_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(reshaped_x, + output = gptq_marlin_gemm(reshaped_x, None, weight, weight_scale, @@ -429,7 +428,7 @@ def apply_awq_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(reshaped_x, + output = gptq_marlin_gemm(reshaped_x, None, weight, weight_scale, diff --git a/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_fp4.py b/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_fp4.py index b6697e1394328f52681dd2b8870fe826d9be5ba3..44348f6491cba69fa04df9f3b09cafac24df0565 100644 --- a/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_fp4.py +++ b/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_fp4.py @@ -5,12 +5,11 @@ from typing import Optional import torch -import quantization as ops - +from .. import gptq_marlin_gemm, gptq_marlin_repack from .marlin_utils import ( USE_FP32_REDUCE_DEFAULT, marlin_make_workspace_new, marlin_permute_scales, should_use_atomic_add_reduce) -from quantization.scalar_type import scalar_types +from ..scalar_type import scalar_types FP4_MARLIN_SUPPORTED_GROUP_SIZES = [16] @@ -90,7 +89,7 @@ def apply_fp4_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(a=reshaped_x, + output = gptq_marlin_gemm(a=reshaped_x, c=None, b_q_weight=weight, b_scales=weight_scale, @@ -135,7 +134,7 @@ def prepare_fp4_layer_for_marlin(layer: torch.nn.Module) -> None: perm = torch.empty(0, dtype=torch.int, device=device) qweight = layer.weight.view(torch.int32).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight, + marlin_qweight = gptq_marlin_repack(b_q_weight=qweight, perm=perm, size_k=part_size_k, size_n=part_size_n, @@ -192,7 +191,7 @@ def prepare_moe_fp4_layer_for_marlin(layer: torch.nn.Module) -> None: for i in range(e): qweight = weight[i].view(torch.int32).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight, + marlin_qweight = gptq_marlin_repack(b_q_weight=qweight, perm=perm, size_k=size_k, size_n=size_n, @@ -263,7 +262,7 @@ def rand_marlin_weight_fp4_like(weight, group_size): weight_ref = weight_ref * global_scale.to(weight.dtype) * \ scales.repeat_interleave(group_size, 1).to(weight.dtype) - marlin_qweight = ops.gptq_marlin_repack( + marlin_qweight = gptq_marlin_repack( b_q_weight=fp4_weight.view(torch.int32).T.contiguous(), perm=torch.empty(0, dtype=torch.int, device=device), size_k=size_k, diff --git a/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_fp8.py b/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_fp8.py index b38fe2d4aff0234cdbf08218da6440b4892e01f0..4ebd749729cfc3c8ada3a85b67b85a11b0f97a5d 100644 --- a/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_fp8.py +++ b/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_fp8.py @@ -5,7 +5,7 @@ from typing import Optional import torch -import quantization as ops +from .. import gptq_marlin_gemm, gptq_marlin_repack from .marlin_utils import USE_FP32_REDUCE_DEFAULT, marlin_make_workspace, marlin_permute_scales @@ -51,7 +51,7 @@ def apply_fp8_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(a=reshaped_x, + output = gptq_marlin_gemm(a=reshaped_x, c=None, b_q_weight=weight, b_scales=weight_scale, @@ -104,7 +104,7 @@ def marlin_quant_fp8_torch(weight, group_size): weight_ref = fp8_weight.to(weight.dtype) * repeated_scales packed_weight = pack_fp8_to_int32(fp8_weight, False).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack( + marlin_qweight = gptq_marlin_repack( b_q_weight=packed_weight, perm=torch.empty(0, dtype=torch.int, device=device), size_k=size_k, diff --git a/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_test.py b/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_test.py index 7d4f5f3cfbb872bf7b32e0972d6143b43f354a5e..b03555f5bffa8d48187d37e53f17e6e3138472cc 100644 --- a/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_test.py +++ b/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_test.py @@ -5,8 +5,7 @@ from typing import List, Optional import numpy as np import torch -from quantization.scalar_type import ScalarType - +from ..scalar_type import ScalarType from .marlin_utils import GPTQ_MARLIN_TILE, marlin_permute_scales, marlin_zero_points from .quant_utils import ( get_pack_factor, diff --git a/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_test_24.py b/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_test_24.py index 927fa9016ba25f381c09d768db0c468066193a76..1c907245c66417ff8715f18474024a1c20c316a7 100644 --- a/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_test_24.py +++ b/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_test_24.py @@ -6,8 +6,7 @@ from typing import List import numpy import torch -from quantization.scalar_type import ScalarType - +from ..scalar_type import ScalarType from .marlin_utils_test import marlin_weights from .quant_utils import gptq_quantize_weights diff --git a/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/quant_utils.py b/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/quant_utils.py index d97e03913fa5980e0be73b160088c8e4f5f49a52..65c90821773b60cca2b0f9102ca7ad4e3cd009d5 100644 --- a/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/quant_utils.py +++ b/build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/quant_utils.py @@ -5,7 +5,7 @@ from typing import List, Optional import numpy import torch -from quantization.scalar_type import ScalarType, scalar_types +from ..scalar_type import ScalarType, scalar_types SUPPORTED_GPTQ_QUANT_TYPES = [scalar_types.uint4b8, scalar_types.uint8b128] SUPPORTED_GROUP_SIZES = [-1, 32, 64, 128] diff --git a/build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/__init__.cpython-312.pyc b/build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8872bc7f54d2408aac2b05dfbf25595755aa0358 Binary files /dev/null and b/build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/__init__.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/_ops.cpython-312.pyc b/build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/_ops.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..11cfeb865e5cb69f164ab143cf045e3b649d1cbb Binary files /dev/null and b/build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/_ops.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc b/build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5ee5ad7c4c9766a53aa9014bd7b2e069ac367926 Binary files /dev/null and b/build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/cutlass.cpython-312.pyc b/build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/cutlass.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b54615be55f3396fa84576da713f5a6fde9ffc02 Binary files /dev/null and b/build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/cutlass.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/marlin.cpython-312.pyc b/build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/marlin.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1bbc2a3839c4ae9346eb0b31c78aaa4e6e94349c Binary files /dev/null and b/build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/marlin.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/platforms.cpython-312.pyc b/build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/platforms.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dbde436a327fb7d67064d7768c473099f43cb739 Binary files /dev/null and b/build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/platforms.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc b/build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..55dc6f41c77bc942a97cca1352487c2f43d4a537 Binary files /dev/null and b/build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu126-aarch64-linux/quantization/_ops.py b/build/torch26-cxx98-cu126-aarch64-linux/quantization/_ops.py index 5f84ea61a2d0c3731d870d86900b212987b4edda..f607840072f0680e47fea4e204f29a7c1d4d5d43 100644 --- a/build/torch26-cxx98-cu126-aarch64-linux/quantization/_ops.py +++ b/build/torch26-cxx98-cu126-aarch64-linux/quantization/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _quantization_82ffd1f -ops = torch.ops._quantization_82ffd1f +from . import _quantization_3313895 +ops = torch.ops._quantization_3313895 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_quantization_82ffd1f::{op_name}" \ No newline at end of file + return f"_quantization_3313895::{op_name}" \ No newline at end of file diff --git a/build/torch26-cxx98-cu126-aarch64-linux/quantization/_quantization_3313895.abi3.so b/build/torch26-cxx98-cu126-aarch64-linux/quantization/_quantization_3313895.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..26d06c1bdbb1b16fe26613651d6ce9c78f3a954b --- /dev/null +++ b/build/torch26-cxx98-cu126-aarch64-linux/quantization/_quantization_3313895.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61f73550ec23c013a886c34cb3f24c5aa55ea2f20e957988ba114ecc207ecac9 +size 159991688 diff --git a/build/torch26-cxx98-cu126-aarch64-linux/quantization/_quantization_82ffd1f.abi3.so b/build/torch26-cxx98-cu126-aarch64-linux/quantization/_quantization_82ffd1f.abi3.so deleted file mode 100755 index 0f47ca1fc8221dd99ba865a978d83cc40951ff90..0000000000000000000000000000000000000000 --- a/build/torch26-cxx98-cu126-aarch64-linux/quantization/_quantization_82ffd1f.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f2bf0942eeeb2b821331211fc74ce7c37fccad95fc1ac6aa8bbc322a6f8ac249 -size 159991696 diff --git a/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc b/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..86cae4a7b0fca0f8ca419852641cdff904c00294 Binary files /dev/null and b/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc b/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2d8e053f80342e1f2c06043126df85dca4d63765 Binary files /dev/null and b/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc b/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8bfb35eba477fdecbce04a8baddb6b34b7473844 Binary files /dev/null and b/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc b/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1b22c5136d041a3b1c7984a41b24a4b5fafcf94a Binary files /dev/null and b/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc b/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..636496c3e2edf534c229bb4a41d25e1cdf5714fd Binary files /dev/null and b/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc differ diff --git a/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils.py b/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils.py index eb2f41d72984bdfbe03a6d71b632371025156448..d587f60d60f9e9f32410d3b12bed8c51fa8e6822 100644 --- a/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils.py +++ b/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils.py @@ -6,8 +6,7 @@ from typing import Optional import numpy import torch -import quantization as ops -from quantization.scalar_type import ScalarType, scalar_types +from .. import ScalarType, gptq_marlin_gemm, scalar_types from .quant_utils import pack_cols, unpack_cols @@ -383,7 +382,7 @@ def apply_gptq_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(reshaped_x, + output = gptq_marlin_gemm(reshaped_x, None, weight, weight_scale, @@ -429,7 +428,7 @@ def apply_awq_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(reshaped_x, + output = gptq_marlin_gemm(reshaped_x, None, weight, weight_scale, diff --git a/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_fp4.py b/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_fp4.py index b6697e1394328f52681dd2b8870fe826d9be5ba3..44348f6491cba69fa04df9f3b09cafac24df0565 100644 --- a/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_fp4.py +++ b/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_fp4.py @@ -5,12 +5,11 @@ from typing import Optional import torch -import quantization as ops - +from .. import gptq_marlin_gemm, gptq_marlin_repack from .marlin_utils import ( USE_FP32_REDUCE_DEFAULT, marlin_make_workspace_new, marlin_permute_scales, should_use_atomic_add_reduce) -from quantization.scalar_type import scalar_types +from ..scalar_type import scalar_types FP4_MARLIN_SUPPORTED_GROUP_SIZES = [16] @@ -90,7 +89,7 @@ def apply_fp4_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(a=reshaped_x, + output = gptq_marlin_gemm(a=reshaped_x, c=None, b_q_weight=weight, b_scales=weight_scale, @@ -135,7 +134,7 @@ def prepare_fp4_layer_for_marlin(layer: torch.nn.Module) -> None: perm = torch.empty(0, dtype=torch.int, device=device) qweight = layer.weight.view(torch.int32).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight, + marlin_qweight = gptq_marlin_repack(b_q_weight=qweight, perm=perm, size_k=part_size_k, size_n=part_size_n, @@ -192,7 +191,7 @@ def prepare_moe_fp4_layer_for_marlin(layer: torch.nn.Module) -> None: for i in range(e): qweight = weight[i].view(torch.int32).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight, + marlin_qweight = gptq_marlin_repack(b_q_weight=qweight, perm=perm, size_k=size_k, size_n=size_n, @@ -263,7 +262,7 @@ def rand_marlin_weight_fp4_like(weight, group_size): weight_ref = weight_ref * global_scale.to(weight.dtype) * \ scales.repeat_interleave(group_size, 1).to(weight.dtype) - marlin_qweight = ops.gptq_marlin_repack( + marlin_qweight = gptq_marlin_repack( b_q_weight=fp4_weight.view(torch.int32).T.contiguous(), perm=torch.empty(0, dtype=torch.int, device=device), size_k=size_k, diff --git a/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_fp8.py b/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_fp8.py index b38fe2d4aff0234cdbf08218da6440b4892e01f0..4ebd749729cfc3c8ada3a85b67b85a11b0f97a5d 100644 --- a/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_fp8.py +++ b/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_fp8.py @@ -5,7 +5,7 @@ from typing import Optional import torch -import quantization as ops +from .. import gptq_marlin_gemm, gptq_marlin_repack from .marlin_utils import USE_FP32_REDUCE_DEFAULT, marlin_make_workspace, marlin_permute_scales @@ -51,7 +51,7 @@ def apply_fp8_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(a=reshaped_x, + output = gptq_marlin_gemm(a=reshaped_x, c=None, b_q_weight=weight, b_scales=weight_scale, @@ -104,7 +104,7 @@ def marlin_quant_fp8_torch(weight, group_size): weight_ref = fp8_weight.to(weight.dtype) * repeated_scales packed_weight = pack_fp8_to_int32(fp8_weight, False).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack( + marlin_qweight = gptq_marlin_repack( b_q_weight=packed_weight, perm=torch.empty(0, dtype=torch.int, device=device), size_k=size_k, diff --git a/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_test.py b/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_test.py index 7d4f5f3cfbb872bf7b32e0972d6143b43f354a5e..b03555f5bffa8d48187d37e53f17e6e3138472cc 100644 --- a/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_test.py +++ b/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_test.py @@ -5,8 +5,7 @@ from typing import List, Optional import numpy as np import torch -from quantization.scalar_type import ScalarType - +from ..scalar_type import ScalarType from .marlin_utils import GPTQ_MARLIN_TILE, marlin_permute_scales, marlin_zero_points from .quant_utils import ( get_pack_factor, diff --git a/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_test_24.py b/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_test_24.py index 927fa9016ba25f381c09d768db0c468066193a76..1c907245c66417ff8715f18474024a1c20c316a7 100644 --- a/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_test_24.py +++ b/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_test_24.py @@ -6,8 +6,7 @@ from typing import List import numpy import torch -from quantization.scalar_type import ScalarType - +from ..scalar_type import ScalarType from .marlin_utils_test import marlin_weights from .quant_utils import gptq_quantize_weights diff --git a/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/quant_utils.py b/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/quant_utils.py index d97e03913fa5980e0be73b160088c8e4f5f49a52..65c90821773b60cca2b0f9102ca7ad4e3cd009d5 100644 --- a/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/quant_utils.py +++ b/build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/quant_utils.py @@ -5,7 +5,7 @@ from typing import List, Optional import numpy import torch -from quantization.scalar_type import ScalarType, scalar_types +from ..scalar_type import ScalarType, scalar_types SUPPORTED_GPTQ_QUANT_TYPES = [scalar_types.uint4b8, scalar_types.uint8b128] SUPPORTED_GROUP_SIZES = [-1, 32, 64, 128] diff --git a/build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/__init__.cpython-312.pyc b/build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2724724f1044e6b212a165fe9e218eac9b28c70a Binary files /dev/null and b/build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/__init__.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/_ops.cpython-312.pyc b/build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/_ops.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..628bb1c4c24612ed1bb33c048d6ac75513a5f423 Binary files /dev/null and b/build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/_ops.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc b/build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1678e3dbc95d8e62c0f97abb9e74b921874654ca Binary files /dev/null and b/build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/cutlass.cpython-312.pyc b/build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/cutlass.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3638fcb14decf6e85e5fd94dd196118dfd8292f5 Binary files /dev/null and b/build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/cutlass.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/marlin.cpython-312.pyc b/build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/marlin.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cff4d0b2557f09e53705bde2dfe9a37e6abb0926 Binary files /dev/null and b/build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/marlin.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/platforms.cpython-312.pyc b/build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/platforms.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..035bdc7ea6df36c36aba73b3e8a434381da99e3b Binary files /dev/null and b/build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/platforms.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc b/build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cfba49b61abe007b2735c6b2f89dd79a87292932 Binary files /dev/null and b/build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu126-aarch64-linux/quantization/_ops.py b/build/torch27-cxx11-cu126-aarch64-linux/quantization/_ops.py index 5f84ea61a2d0c3731d870d86900b212987b4edda..f607840072f0680e47fea4e204f29a7c1d4d5d43 100644 --- a/build/torch27-cxx11-cu126-aarch64-linux/quantization/_ops.py +++ b/build/torch27-cxx11-cu126-aarch64-linux/quantization/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _quantization_82ffd1f -ops = torch.ops._quantization_82ffd1f +from . import _quantization_3313895 +ops = torch.ops._quantization_3313895 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_quantization_82ffd1f::{op_name}" \ No newline at end of file + return f"_quantization_3313895::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-cu126-aarch64-linux/quantization/_quantization_3313895.abi3.so b/build/torch27-cxx11-cu126-aarch64-linux/quantization/_quantization_3313895.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..ba2957d55863730a21550cc2eda9a7e17e99d79b --- /dev/null +++ b/build/torch27-cxx11-cu126-aarch64-linux/quantization/_quantization_3313895.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c937e095108ec91e92ffcc629e2a62ff931aa0899b430088ced81ae6fee8b7b4 +size 159999616 diff --git a/build/torch27-cxx11-cu126-aarch64-linux/quantization/_quantization_82ffd1f.abi3.so b/build/torch27-cxx11-cu126-aarch64-linux/quantization/_quantization_82ffd1f.abi3.so deleted file mode 100755 index 76546752d3d0681ddb01d0d0a777cb76ebbc66af..0000000000000000000000000000000000000000 --- a/build/torch27-cxx11-cu126-aarch64-linux/quantization/_quantization_82ffd1f.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ef0e68ff25982049ce0b6af570f6546c8f62a49e373397d352f89376c1805de4 -size 159934080 diff --git a/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc b/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ea792a83eeebc820a2abc16ac215d805272a1c03 Binary files /dev/null and b/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc b/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1f94f0a154bd93e6399ecb4cd23420e634699a9a Binary files /dev/null and b/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc b/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..229ed443f05e065ea2edb0c1d2cd2555654be9d8 Binary files /dev/null and b/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc b/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..62dd55a6dbfbd4871cb87f5b541339dcf89d6ef5 Binary files /dev/null and b/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc b/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..754fed5fc9c919eb63dba2735a2e29815874b2d9 Binary files /dev/null and b/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils.py b/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils.py index eb2f41d72984bdfbe03a6d71b632371025156448..d587f60d60f9e9f32410d3b12bed8c51fa8e6822 100644 --- a/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils.py +++ b/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils.py @@ -6,8 +6,7 @@ from typing import Optional import numpy import torch -import quantization as ops -from quantization.scalar_type import ScalarType, scalar_types +from .. import ScalarType, gptq_marlin_gemm, scalar_types from .quant_utils import pack_cols, unpack_cols @@ -383,7 +382,7 @@ def apply_gptq_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(reshaped_x, + output = gptq_marlin_gemm(reshaped_x, None, weight, weight_scale, @@ -429,7 +428,7 @@ def apply_awq_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(reshaped_x, + output = gptq_marlin_gemm(reshaped_x, None, weight, weight_scale, diff --git a/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_fp4.py b/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_fp4.py index b6697e1394328f52681dd2b8870fe826d9be5ba3..44348f6491cba69fa04df9f3b09cafac24df0565 100644 --- a/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_fp4.py +++ b/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_fp4.py @@ -5,12 +5,11 @@ from typing import Optional import torch -import quantization as ops - +from .. import gptq_marlin_gemm, gptq_marlin_repack from .marlin_utils import ( USE_FP32_REDUCE_DEFAULT, marlin_make_workspace_new, marlin_permute_scales, should_use_atomic_add_reduce) -from quantization.scalar_type import scalar_types +from ..scalar_type import scalar_types FP4_MARLIN_SUPPORTED_GROUP_SIZES = [16] @@ -90,7 +89,7 @@ def apply_fp4_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(a=reshaped_x, + output = gptq_marlin_gemm(a=reshaped_x, c=None, b_q_weight=weight, b_scales=weight_scale, @@ -135,7 +134,7 @@ def prepare_fp4_layer_for_marlin(layer: torch.nn.Module) -> None: perm = torch.empty(0, dtype=torch.int, device=device) qweight = layer.weight.view(torch.int32).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight, + marlin_qweight = gptq_marlin_repack(b_q_weight=qweight, perm=perm, size_k=part_size_k, size_n=part_size_n, @@ -192,7 +191,7 @@ def prepare_moe_fp4_layer_for_marlin(layer: torch.nn.Module) -> None: for i in range(e): qweight = weight[i].view(torch.int32).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight, + marlin_qweight = gptq_marlin_repack(b_q_weight=qweight, perm=perm, size_k=size_k, size_n=size_n, @@ -263,7 +262,7 @@ def rand_marlin_weight_fp4_like(weight, group_size): weight_ref = weight_ref * global_scale.to(weight.dtype) * \ scales.repeat_interleave(group_size, 1).to(weight.dtype) - marlin_qweight = ops.gptq_marlin_repack( + marlin_qweight = gptq_marlin_repack( b_q_weight=fp4_weight.view(torch.int32).T.contiguous(), perm=torch.empty(0, dtype=torch.int, device=device), size_k=size_k, diff --git a/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_fp8.py b/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_fp8.py index b38fe2d4aff0234cdbf08218da6440b4892e01f0..4ebd749729cfc3c8ada3a85b67b85a11b0f97a5d 100644 --- a/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_fp8.py +++ b/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_fp8.py @@ -5,7 +5,7 @@ from typing import Optional import torch -import quantization as ops +from .. import gptq_marlin_gemm, gptq_marlin_repack from .marlin_utils import USE_FP32_REDUCE_DEFAULT, marlin_make_workspace, marlin_permute_scales @@ -51,7 +51,7 @@ def apply_fp8_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(a=reshaped_x, + output = gptq_marlin_gemm(a=reshaped_x, c=None, b_q_weight=weight, b_scales=weight_scale, @@ -104,7 +104,7 @@ def marlin_quant_fp8_torch(weight, group_size): weight_ref = fp8_weight.to(weight.dtype) * repeated_scales packed_weight = pack_fp8_to_int32(fp8_weight, False).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack( + marlin_qweight = gptq_marlin_repack( b_q_weight=packed_weight, perm=torch.empty(0, dtype=torch.int, device=device), size_k=size_k, diff --git a/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_test.py b/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_test.py index 7d4f5f3cfbb872bf7b32e0972d6143b43f354a5e..b03555f5bffa8d48187d37e53f17e6e3138472cc 100644 --- a/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_test.py +++ b/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_test.py @@ -5,8 +5,7 @@ from typing import List, Optional import numpy as np import torch -from quantization.scalar_type import ScalarType - +from ..scalar_type import ScalarType from .marlin_utils import GPTQ_MARLIN_TILE, marlin_permute_scales, marlin_zero_points from .quant_utils import ( get_pack_factor, diff --git a/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_test_24.py b/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_test_24.py index 927fa9016ba25f381c09d768db0c468066193a76..1c907245c66417ff8715f18474024a1c20c316a7 100644 --- a/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_test_24.py +++ b/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_test_24.py @@ -6,8 +6,7 @@ from typing import List import numpy import torch -from quantization.scalar_type import ScalarType - +from ..scalar_type import ScalarType from .marlin_utils_test import marlin_weights from .quant_utils import gptq_quantize_weights diff --git a/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/quant_utils.py b/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/quant_utils.py index d97e03913fa5980e0be73b160088c8e4f5f49a52..65c90821773b60cca2b0f9102ca7ad4e3cd009d5 100644 --- a/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/quant_utils.py +++ b/build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/quant_utils.py @@ -5,7 +5,7 @@ from typing import List, Optional import numpy import torch -from quantization.scalar_type import ScalarType, scalar_types +from ..scalar_type import ScalarType, scalar_types SUPPORTED_GPTQ_QUANT_TYPES = [scalar_types.uint4b8, scalar_types.uint8b128] SUPPORTED_GROUP_SIZES = [-1, 32, 64, 128] diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization/__pycache__/__init__.cpython-312.pyc b/build/torch27-cxx11-cu128-aarch64-linux/quantization/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..caa19a7f506da177659d759ed798765313a6b757 Binary files /dev/null and b/build/torch27-cxx11-cu128-aarch64-linux/quantization/__pycache__/__init__.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization/__pycache__/_ops.cpython-312.pyc b/build/torch27-cxx11-cu128-aarch64-linux/quantization/__pycache__/_ops.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e0a92b850265dcf6f3c692a7561a8b80e79afcb3 Binary files /dev/null and b/build/torch27-cxx11-cu128-aarch64-linux/quantization/__pycache__/_ops.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc b/build/torch27-cxx11-cu128-aarch64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3297794e9a9217f8e64f88c8691222102880c855 Binary files /dev/null and b/build/torch27-cxx11-cu128-aarch64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization/__pycache__/cutlass.cpython-312.pyc b/build/torch27-cxx11-cu128-aarch64-linux/quantization/__pycache__/cutlass.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b14857755637fe61006a36e7849d2cf92c653982 Binary files /dev/null and b/build/torch27-cxx11-cu128-aarch64-linux/quantization/__pycache__/cutlass.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization/__pycache__/marlin.cpython-312.pyc b/build/torch27-cxx11-cu128-aarch64-linux/quantization/__pycache__/marlin.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0a257d3363cca637e82b2483ebe6fffb619d7e5a Binary files /dev/null and b/build/torch27-cxx11-cu128-aarch64-linux/quantization/__pycache__/marlin.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization/__pycache__/platforms.cpython-312.pyc b/build/torch27-cxx11-cu128-aarch64-linux/quantization/__pycache__/platforms.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..672cc31cbdb4e692938ce896f44ce6c636e9e2f4 Binary files /dev/null and b/build/torch27-cxx11-cu128-aarch64-linux/quantization/__pycache__/platforms.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc b/build/torch27-cxx11-cu128-aarch64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..df4a7ddcccd770353c919b23696c705bb7c518ac Binary files /dev/null and b/build/torch27-cxx11-cu128-aarch64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization/_ops.py b/build/torch27-cxx11-cu128-aarch64-linux/quantization/_ops.py index 5f84ea61a2d0c3731d870d86900b212987b4edda..f607840072f0680e47fea4e204f29a7c1d4d5d43 100644 --- a/build/torch27-cxx11-cu128-aarch64-linux/quantization/_ops.py +++ b/build/torch27-cxx11-cu128-aarch64-linux/quantization/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _quantization_82ffd1f -ops = torch.ops._quantization_82ffd1f +from . import _quantization_3313895 +ops = torch.ops._quantization_3313895 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_quantization_82ffd1f::{op_name}" \ No newline at end of file + return f"_quantization_3313895::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization/_quantization_3313895.abi3.so b/build/torch27-cxx11-cu128-aarch64-linux/quantization/_quantization_3313895.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..796a17e7d50ea09aaa3cec515ab0fc348be153d1 --- /dev/null +++ b/build/torch27-cxx11-cu128-aarch64-linux/quantization/_quantization_3313895.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13d662349704d517c327bb1f10b7e14fbdd3cbfb6b19da035a528a72f4b977ff +size 296561248 diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization/_quantization_82ffd1f.abi3.so b/build/torch27-cxx11-cu128-aarch64-linux/quantization/_quantization_82ffd1f.abi3.so deleted file mode 100755 index f8757d2478e3536c8080b3c2ae9cc710dfdf3fc8..0000000000000000000000000000000000000000 --- a/build/torch27-cxx11-cu128-aarch64-linux/quantization/_quantization_82ffd1f.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7c8d85c6222df8ff6de82adbad94502fdc5c1910dbaa367034c8975c4f85244a -size 296561256 diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc b/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b234aaef659eaea728ba3478620e4b9cdc336f71 Binary files /dev/null and b/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc b/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a8d833c2a8d3c0a5a7216e8111d13ce37ebaf045 Binary files /dev/null and b/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc b/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..562b894c86c525160893c60185fae3ca5bc9bb60 Binary files /dev/null and b/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc b/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..42dee4e6bb3d17333cdac999bd9995f8901c0dd6 Binary files /dev/null and b/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc b/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e5d2f777ec3603982291ba7deb79b5b4a0947b0c Binary files /dev/null and b/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc differ diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/marlin_utils.py b/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/marlin_utils.py index eb2f41d72984bdfbe03a6d71b632371025156448..d587f60d60f9e9f32410d3b12bed8c51fa8e6822 100644 --- a/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/marlin_utils.py +++ b/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/marlin_utils.py @@ -6,8 +6,7 @@ from typing import Optional import numpy import torch -import quantization as ops -from quantization.scalar_type import ScalarType, scalar_types +from .. import ScalarType, gptq_marlin_gemm, scalar_types from .quant_utils import pack_cols, unpack_cols @@ -383,7 +382,7 @@ def apply_gptq_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(reshaped_x, + output = gptq_marlin_gemm(reshaped_x, None, weight, weight_scale, @@ -429,7 +428,7 @@ def apply_awq_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(reshaped_x, + output = gptq_marlin_gemm(reshaped_x, None, weight, weight_scale, diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/marlin_utils_fp4.py b/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/marlin_utils_fp4.py index b6697e1394328f52681dd2b8870fe826d9be5ba3..44348f6491cba69fa04df9f3b09cafac24df0565 100644 --- a/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/marlin_utils_fp4.py +++ b/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/marlin_utils_fp4.py @@ -5,12 +5,11 @@ from typing import Optional import torch -import quantization as ops - +from .. import gptq_marlin_gemm, gptq_marlin_repack from .marlin_utils import ( USE_FP32_REDUCE_DEFAULT, marlin_make_workspace_new, marlin_permute_scales, should_use_atomic_add_reduce) -from quantization.scalar_type import scalar_types +from ..scalar_type import scalar_types FP4_MARLIN_SUPPORTED_GROUP_SIZES = [16] @@ -90,7 +89,7 @@ def apply_fp4_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(a=reshaped_x, + output = gptq_marlin_gemm(a=reshaped_x, c=None, b_q_weight=weight, b_scales=weight_scale, @@ -135,7 +134,7 @@ def prepare_fp4_layer_for_marlin(layer: torch.nn.Module) -> None: perm = torch.empty(0, dtype=torch.int, device=device) qweight = layer.weight.view(torch.int32).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight, + marlin_qweight = gptq_marlin_repack(b_q_weight=qweight, perm=perm, size_k=part_size_k, size_n=part_size_n, @@ -192,7 +191,7 @@ def prepare_moe_fp4_layer_for_marlin(layer: torch.nn.Module) -> None: for i in range(e): qweight = weight[i].view(torch.int32).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight, + marlin_qweight = gptq_marlin_repack(b_q_weight=qweight, perm=perm, size_k=size_k, size_n=size_n, @@ -263,7 +262,7 @@ def rand_marlin_weight_fp4_like(weight, group_size): weight_ref = weight_ref * global_scale.to(weight.dtype) * \ scales.repeat_interleave(group_size, 1).to(weight.dtype) - marlin_qweight = ops.gptq_marlin_repack( + marlin_qweight = gptq_marlin_repack( b_q_weight=fp4_weight.view(torch.int32).T.contiguous(), perm=torch.empty(0, dtype=torch.int, device=device), size_k=size_k, diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/marlin_utils_fp8.py b/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/marlin_utils_fp8.py index b38fe2d4aff0234cdbf08218da6440b4892e01f0..4ebd749729cfc3c8ada3a85b67b85a11b0f97a5d 100644 --- a/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/marlin_utils_fp8.py +++ b/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/marlin_utils_fp8.py @@ -5,7 +5,7 @@ from typing import Optional import torch -import quantization as ops +from .. import gptq_marlin_gemm, gptq_marlin_repack from .marlin_utils import USE_FP32_REDUCE_DEFAULT, marlin_make_workspace, marlin_permute_scales @@ -51,7 +51,7 @@ def apply_fp8_marlin_linear( device=input.device, dtype=input.dtype) - output = ops.gptq_marlin_gemm(a=reshaped_x, + output = gptq_marlin_gemm(a=reshaped_x, c=None, b_q_weight=weight, b_scales=weight_scale, @@ -104,7 +104,7 @@ def marlin_quant_fp8_torch(weight, group_size): weight_ref = fp8_weight.to(weight.dtype) * repeated_scales packed_weight = pack_fp8_to_int32(fp8_weight, False).T.contiguous() - marlin_qweight = ops.gptq_marlin_repack( + marlin_qweight = gptq_marlin_repack( b_q_weight=packed_weight, perm=torch.empty(0, dtype=torch.int, device=device), size_k=size_k, diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/marlin_utils_test.py b/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/marlin_utils_test.py index 7d4f5f3cfbb872bf7b32e0972d6143b43f354a5e..b03555f5bffa8d48187d37e53f17e6e3138472cc 100644 --- a/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/marlin_utils_test.py +++ b/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/marlin_utils_test.py @@ -5,8 +5,7 @@ from typing import List, Optional import numpy as np import torch -from quantization.scalar_type import ScalarType - +from ..scalar_type import ScalarType from .marlin_utils import GPTQ_MARLIN_TILE, marlin_permute_scales, marlin_zero_points from .quant_utils import ( get_pack_factor, diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/marlin_utils_test_24.py b/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/marlin_utils_test_24.py index 927fa9016ba25f381c09d768db0c468066193a76..1c907245c66417ff8715f18474024a1c20c316a7 100644 --- a/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/marlin_utils_test_24.py +++ b/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/marlin_utils_test_24.py @@ -6,8 +6,7 @@ from typing import List import numpy import torch -from quantization.scalar_type import ScalarType - +from ..scalar_type import ScalarType from .marlin_utils_test import marlin_weights from .quant_utils import gptq_quantize_weights diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/quant_utils.py b/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/quant_utils.py index d97e03913fa5980e0be73b160088c8e4f5f49a52..65c90821773b60cca2b0f9102ca7ad4e3cd009d5 100644 --- a/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/quant_utils.py +++ b/build/torch27-cxx11-cu128-aarch64-linux/quantization/utils/quant_utils.py @@ -5,7 +5,7 @@ from typing import List, Optional import numpy import torch -from quantization.scalar_type import ScalarType, scalar_types +from ..scalar_type import ScalarType, scalar_types SUPPORTED_GPTQ_QUANT_TYPES = [scalar_types.uint4b8, scalar_types.uint8b128] SUPPORTED_GROUP_SIZES = [-1, 32, 64, 128]