Build (aarch64)
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/__init__.cpython-312.pyc +0 -0
- build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/_ops.cpython-312.pyc +0 -0
- build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc +0 -0
- build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/cutlass.cpython-312.pyc +0 -0
- build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/marlin.cpython-312.pyc +0 -0
- build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/platforms.cpython-312.pyc +0 -0
- build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc +0 -0
- build/torch26-cxx11-cu126-aarch64-linux/quantization/_ops.py +3 -3
- build/torch26-cxx11-cu126-aarch64-linux/quantization/{_quantization_82ffd1f.abi3.so → _quantization_3313895.abi3.so} +2 -2
- build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc +0 -0
- build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc +0 -0
- build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc +0 -0
- build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc +0 -0
- build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc +0 -0
- build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils.py +3 -4
- build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_fp4.py +6 -7
- build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_fp8.py +3 -3
- build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_test.py +1 -2
- build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_test_24.py +1 -2
- build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/quant_utils.py +1 -1
- build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/__init__.cpython-312.pyc +0 -0
- build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/_ops.cpython-312.pyc +0 -0
- build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc +0 -0
- build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/cutlass.cpython-312.pyc +0 -0
- build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/marlin.cpython-312.pyc +0 -0
- build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/platforms.cpython-312.pyc +0 -0
- build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc +0 -0
- build/torch26-cxx98-cu126-aarch64-linux/quantization/_ops.py +3 -3
- build/torch26-cxx98-cu126-aarch64-linux/quantization/{_quantization_82ffd1f.abi3.so → _quantization_3313895.abi3.so} +2 -2
- build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc +0 -0
- build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc +0 -0
- build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc +0 -0
- build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc +0 -0
- build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc +0 -0
- build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils.py +3 -4
- build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_fp4.py +6 -7
- build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_fp8.py +3 -3
- build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_test.py +1 -2
- build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_test_24.py +1 -2
- build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/quant_utils.py +1 -1
- build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/__init__.cpython-312.pyc +0 -0
- build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/_ops.cpython-312.pyc +0 -0
- build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc +0 -0
- build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/cutlass.cpython-312.pyc +0 -0
- build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/marlin.cpython-312.pyc +0 -0
- build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/platforms.cpython-312.pyc +0 -0
- build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc +0 -0
- build/torch27-cxx11-cu126-aarch64-linux/quantization/_ops.py +3 -3
- build/torch27-cxx11-cu126-aarch64-linux/quantization/{_quantization_82ffd1f.abi3.so → _quantization_3313895.abi3.so} +2 -2
- build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc +0 -0
build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (1.02 kB). View file
|
|
build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/_ops.cpython-312.pyc
ADDED
Binary file (539 Bytes). View file
|
|
build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc
ADDED
Binary file (5.33 kB). View file
|
|
build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/cutlass.cpython-312.pyc
ADDED
Binary file (3.88 kB). View file
|
|
build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/marlin.cpython-312.pyc
ADDED
Binary file (7.9 kB). View file
|
|
build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/platforms.cpython-312.pyc
ADDED
Binary file (5.75 kB). View file
|
|
build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc
ADDED
Binary file (14.2 kB). View file
|
|
build/torch26-cxx11-cu126-aarch64-linux/quantization/_ops.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _quantization_3313895
|
3 |
+
ops = torch.ops._quantization_3313895
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_quantization_3313895::{op_name}"
|
build/torch26-cxx11-cu126-aarch64-linux/quantization/{_quantization_82ffd1f.abi3.so → _quantization_3313895.abi3.so}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d3ee57d2c1cc0ad50da66137aba7bc19177808ab669b04af56724343ff91ead0
|
3 |
+
size 159999664
|
build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (186 Bytes). View file
|
|
build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc
ADDED
Binary file (17.6 kB). View file
|
|
build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc
ADDED
Binary file (11.9 kB). View file
|
|
build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc
ADDED
Binary file (5.31 kB). View file
|
|
build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc
ADDED
Binary file (20 kB). View file
|
|
build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils.py
CHANGED
@@ -6,8 +6,7 @@ from typing import Optional
|
|
6 |
import numpy
|
7 |
import torch
|
8 |
|
9 |
-
import
|
10 |
-
from quantization.scalar_type import ScalarType, scalar_types
|
11 |
|
12 |
from .quant_utils import pack_cols, unpack_cols
|
13 |
|
@@ -383,7 +382,7 @@ def apply_gptq_marlin_linear(
|
|
383 |
device=input.device,
|
384 |
dtype=input.dtype)
|
385 |
|
386 |
-
output =
|
387 |
None,
|
388 |
weight,
|
389 |
weight_scale,
|
@@ -429,7 +428,7 @@ def apply_awq_marlin_linear(
|
|
429 |
device=input.device,
|
430 |
dtype=input.dtype)
|
431 |
|
432 |
-
output =
|
433 |
None,
|
434 |
weight,
|
435 |
weight_scale,
|
|
|
6 |
import numpy
|
7 |
import torch
|
8 |
|
9 |
+
from .. import ScalarType, gptq_marlin_gemm, scalar_types
|
|
|
10 |
|
11 |
from .quant_utils import pack_cols, unpack_cols
|
12 |
|
|
|
382 |
device=input.device,
|
383 |
dtype=input.dtype)
|
384 |
|
385 |
+
output = gptq_marlin_gemm(reshaped_x,
|
386 |
None,
|
387 |
weight,
|
388 |
weight_scale,
|
|
|
428 |
device=input.device,
|
429 |
dtype=input.dtype)
|
430 |
|
431 |
+
output = gptq_marlin_gemm(reshaped_x,
|
432 |
None,
|
433 |
weight,
|
434 |
weight_scale,
|
build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_fp4.py
CHANGED
@@ -5,12 +5,11 @@ from typing import Optional
|
|
5 |
|
6 |
import torch
|
7 |
|
8 |
-
import
|
9 |
-
|
10 |
from .marlin_utils import (
|
11 |
USE_FP32_REDUCE_DEFAULT, marlin_make_workspace_new, marlin_permute_scales,
|
12 |
should_use_atomic_add_reduce)
|
13 |
-
from
|
14 |
|
15 |
FP4_MARLIN_SUPPORTED_GROUP_SIZES = [16]
|
16 |
|
@@ -90,7 +89,7 @@ def apply_fp4_marlin_linear(
|
|
90 |
device=input.device,
|
91 |
dtype=input.dtype)
|
92 |
|
93 |
-
output =
|
94 |
c=None,
|
95 |
b_q_weight=weight,
|
96 |
b_scales=weight_scale,
|
@@ -135,7 +134,7 @@ def prepare_fp4_layer_for_marlin(layer: torch.nn.Module) -> None:
|
|
135 |
perm = torch.empty(0, dtype=torch.int, device=device)
|
136 |
qweight = layer.weight.view(torch.int32).T.contiguous()
|
137 |
|
138 |
-
marlin_qweight =
|
139 |
perm=perm,
|
140 |
size_k=part_size_k,
|
141 |
size_n=part_size_n,
|
@@ -192,7 +191,7 @@ def prepare_moe_fp4_layer_for_marlin(layer: torch.nn.Module) -> None:
|
|
192 |
for i in range(e):
|
193 |
qweight = weight[i].view(torch.int32).T.contiguous()
|
194 |
|
195 |
-
marlin_qweight =
|
196 |
perm=perm,
|
197 |
size_k=size_k,
|
198 |
size_n=size_n,
|
@@ -263,7 +262,7 @@ def rand_marlin_weight_fp4_like(weight, group_size):
|
|
263 |
weight_ref = weight_ref * global_scale.to(weight.dtype) * \
|
264 |
scales.repeat_interleave(group_size, 1).to(weight.dtype)
|
265 |
|
266 |
-
marlin_qweight =
|
267 |
b_q_weight=fp4_weight.view(torch.int32).T.contiguous(),
|
268 |
perm=torch.empty(0, dtype=torch.int, device=device),
|
269 |
size_k=size_k,
|
|
|
5 |
|
6 |
import torch
|
7 |
|
8 |
+
from .. import gptq_marlin_gemm, gptq_marlin_repack
|
|
|
9 |
from .marlin_utils import (
|
10 |
USE_FP32_REDUCE_DEFAULT, marlin_make_workspace_new, marlin_permute_scales,
|
11 |
should_use_atomic_add_reduce)
|
12 |
+
from ..scalar_type import scalar_types
|
13 |
|
14 |
FP4_MARLIN_SUPPORTED_GROUP_SIZES = [16]
|
15 |
|
|
|
89 |
device=input.device,
|
90 |
dtype=input.dtype)
|
91 |
|
92 |
+
output = gptq_marlin_gemm(a=reshaped_x,
|
93 |
c=None,
|
94 |
b_q_weight=weight,
|
95 |
b_scales=weight_scale,
|
|
|
134 |
perm = torch.empty(0, dtype=torch.int, device=device)
|
135 |
qweight = layer.weight.view(torch.int32).T.contiguous()
|
136 |
|
137 |
+
marlin_qweight = gptq_marlin_repack(b_q_weight=qweight,
|
138 |
perm=perm,
|
139 |
size_k=part_size_k,
|
140 |
size_n=part_size_n,
|
|
|
191 |
for i in range(e):
|
192 |
qweight = weight[i].view(torch.int32).T.contiguous()
|
193 |
|
194 |
+
marlin_qweight = gptq_marlin_repack(b_q_weight=qweight,
|
195 |
perm=perm,
|
196 |
size_k=size_k,
|
197 |
size_n=size_n,
|
|
|
262 |
weight_ref = weight_ref * global_scale.to(weight.dtype) * \
|
263 |
scales.repeat_interleave(group_size, 1).to(weight.dtype)
|
264 |
|
265 |
+
marlin_qweight = gptq_marlin_repack(
|
266 |
b_q_weight=fp4_weight.view(torch.int32).T.contiguous(),
|
267 |
perm=torch.empty(0, dtype=torch.int, device=device),
|
268 |
size_k=size_k,
|
build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_fp8.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5 |
|
6 |
import torch
|
7 |
|
8 |
-
import
|
9 |
|
10 |
from .marlin_utils import USE_FP32_REDUCE_DEFAULT, marlin_make_workspace, marlin_permute_scales
|
11 |
|
@@ -51,7 +51,7 @@ def apply_fp8_marlin_linear(
|
|
51 |
device=input.device,
|
52 |
dtype=input.dtype)
|
53 |
|
54 |
-
output =
|
55 |
c=None,
|
56 |
b_q_weight=weight,
|
57 |
b_scales=weight_scale,
|
@@ -104,7 +104,7 @@ def marlin_quant_fp8_torch(weight, group_size):
|
|
104 |
weight_ref = fp8_weight.to(weight.dtype) * repeated_scales
|
105 |
|
106 |
packed_weight = pack_fp8_to_int32(fp8_weight, False).T.contiguous()
|
107 |
-
marlin_qweight =
|
108 |
b_q_weight=packed_weight,
|
109 |
perm=torch.empty(0, dtype=torch.int, device=device),
|
110 |
size_k=size_k,
|
|
|
5 |
|
6 |
import torch
|
7 |
|
8 |
+
from .. import gptq_marlin_gemm, gptq_marlin_repack
|
9 |
|
10 |
from .marlin_utils import USE_FP32_REDUCE_DEFAULT, marlin_make_workspace, marlin_permute_scales
|
11 |
|
|
|
51 |
device=input.device,
|
52 |
dtype=input.dtype)
|
53 |
|
54 |
+
output = gptq_marlin_gemm(a=reshaped_x,
|
55 |
c=None,
|
56 |
b_q_weight=weight,
|
57 |
b_scales=weight_scale,
|
|
|
104 |
weight_ref = fp8_weight.to(weight.dtype) * repeated_scales
|
105 |
|
106 |
packed_weight = pack_fp8_to_int32(fp8_weight, False).T.contiguous()
|
107 |
+
marlin_qweight = gptq_marlin_repack(
|
108 |
b_q_weight=packed_weight,
|
109 |
perm=torch.empty(0, dtype=torch.int, device=device),
|
110 |
size_k=size_k,
|
build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_test.py
CHANGED
@@ -5,8 +5,7 @@ from typing import List, Optional
|
|
5 |
import numpy as np
|
6 |
import torch
|
7 |
|
8 |
-
from
|
9 |
-
|
10 |
from .marlin_utils import GPTQ_MARLIN_TILE, marlin_permute_scales, marlin_zero_points
|
11 |
from .quant_utils import (
|
12 |
get_pack_factor,
|
|
|
5 |
import numpy as np
|
6 |
import torch
|
7 |
|
8 |
+
from ..scalar_type import ScalarType
|
|
|
9 |
from .marlin_utils import GPTQ_MARLIN_TILE, marlin_permute_scales, marlin_zero_points
|
10 |
from .quant_utils import (
|
11 |
get_pack_factor,
|
build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_test_24.py
CHANGED
@@ -6,8 +6,7 @@ from typing import List
|
|
6 |
import numpy
|
7 |
import torch
|
8 |
|
9 |
-
from
|
10 |
-
|
11 |
from .marlin_utils_test import marlin_weights
|
12 |
from .quant_utils import gptq_quantize_weights
|
13 |
|
|
|
6 |
import numpy
|
7 |
import torch
|
8 |
|
9 |
+
from ..scalar_type import ScalarType
|
|
|
10 |
from .marlin_utils_test import marlin_weights
|
11 |
from .quant_utils import gptq_quantize_weights
|
12 |
|
build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/quant_utils.py
CHANGED
@@ -5,7 +5,7 @@ from typing import List, Optional
|
|
5 |
import numpy
|
6 |
import torch
|
7 |
|
8 |
-
from
|
9 |
|
10 |
SUPPORTED_GPTQ_QUANT_TYPES = [scalar_types.uint4b8, scalar_types.uint8b128]
|
11 |
SUPPORTED_GROUP_SIZES = [-1, 32, 64, 128]
|
|
|
5 |
import numpy
|
6 |
import torch
|
7 |
|
8 |
+
from ..scalar_type import ScalarType, scalar_types
|
9 |
|
10 |
SUPPORTED_GPTQ_QUANT_TYPES = [scalar_types.uint4b8, scalar_types.uint8b128]
|
11 |
SUPPORTED_GROUP_SIZES = [-1, 32, 64, 128]
|
build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (1.02 kB). View file
|
|
build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/_ops.cpython-312.pyc
ADDED
Binary file (539 Bytes). View file
|
|
build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc
ADDED
Binary file (5.33 kB). View file
|
|
build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/cutlass.cpython-312.pyc
ADDED
Binary file (3.88 kB). View file
|
|
build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/marlin.cpython-312.pyc
ADDED
Binary file (7.9 kB). View file
|
|
build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/platforms.cpython-312.pyc
ADDED
Binary file (5.75 kB). View file
|
|
build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc
ADDED
Binary file (14.2 kB). View file
|
|
build/torch26-cxx98-cu126-aarch64-linux/quantization/_ops.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _quantization_3313895
|
3 |
+
ops = torch.ops._quantization_3313895
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_quantization_3313895::{op_name}"
|
build/torch26-cxx98-cu126-aarch64-linux/quantization/{_quantization_82ffd1f.abi3.so → _quantization_3313895.abi3.so}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61f73550ec23c013a886c34cb3f24c5aa55ea2f20e957988ba114ecc207ecac9
|
3 |
+
size 159991688
|
build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (186 Bytes). View file
|
|
build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc
ADDED
Binary file (17.6 kB). View file
|
|
build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc
ADDED
Binary file (11.9 kB). View file
|
|
build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc
ADDED
Binary file (5.31 kB). View file
|
|
build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc
ADDED
Binary file (20 kB). View file
|
|
build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils.py
CHANGED
@@ -6,8 +6,7 @@ from typing import Optional
|
|
6 |
import numpy
|
7 |
import torch
|
8 |
|
9 |
-
import
|
10 |
-
from quantization.scalar_type import ScalarType, scalar_types
|
11 |
|
12 |
from .quant_utils import pack_cols, unpack_cols
|
13 |
|
@@ -383,7 +382,7 @@ def apply_gptq_marlin_linear(
|
|
383 |
device=input.device,
|
384 |
dtype=input.dtype)
|
385 |
|
386 |
-
output =
|
387 |
None,
|
388 |
weight,
|
389 |
weight_scale,
|
@@ -429,7 +428,7 @@ def apply_awq_marlin_linear(
|
|
429 |
device=input.device,
|
430 |
dtype=input.dtype)
|
431 |
|
432 |
-
output =
|
433 |
None,
|
434 |
weight,
|
435 |
weight_scale,
|
|
|
6 |
import numpy
|
7 |
import torch
|
8 |
|
9 |
+
from .. import ScalarType, gptq_marlin_gemm, scalar_types
|
|
|
10 |
|
11 |
from .quant_utils import pack_cols, unpack_cols
|
12 |
|
|
|
382 |
device=input.device,
|
383 |
dtype=input.dtype)
|
384 |
|
385 |
+
output = gptq_marlin_gemm(reshaped_x,
|
386 |
None,
|
387 |
weight,
|
388 |
weight_scale,
|
|
|
428 |
device=input.device,
|
429 |
dtype=input.dtype)
|
430 |
|
431 |
+
output = gptq_marlin_gemm(reshaped_x,
|
432 |
None,
|
433 |
weight,
|
434 |
weight_scale,
|
build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_fp4.py
CHANGED
@@ -5,12 +5,11 @@ from typing import Optional
|
|
5 |
|
6 |
import torch
|
7 |
|
8 |
-
import
|
9 |
-
|
10 |
from .marlin_utils import (
|
11 |
USE_FP32_REDUCE_DEFAULT, marlin_make_workspace_new, marlin_permute_scales,
|
12 |
should_use_atomic_add_reduce)
|
13 |
-
from
|
14 |
|
15 |
FP4_MARLIN_SUPPORTED_GROUP_SIZES = [16]
|
16 |
|
@@ -90,7 +89,7 @@ def apply_fp4_marlin_linear(
|
|
90 |
device=input.device,
|
91 |
dtype=input.dtype)
|
92 |
|
93 |
-
output =
|
94 |
c=None,
|
95 |
b_q_weight=weight,
|
96 |
b_scales=weight_scale,
|
@@ -135,7 +134,7 @@ def prepare_fp4_layer_for_marlin(layer: torch.nn.Module) -> None:
|
|
135 |
perm = torch.empty(0, dtype=torch.int, device=device)
|
136 |
qweight = layer.weight.view(torch.int32).T.contiguous()
|
137 |
|
138 |
-
marlin_qweight =
|
139 |
perm=perm,
|
140 |
size_k=part_size_k,
|
141 |
size_n=part_size_n,
|
@@ -192,7 +191,7 @@ def prepare_moe_fp4_layer_for_marlin(layer: torch.nn.Module) -> None:
|
|
192 |
for i in range(e):
|
193 |
qweight = weight[i].view(torch.int32).T.contiguous()
|
194 |
|
195 |
-
marlin_qweight =
|
196 |
perm=perm,
|
197 |
size_k=size_k,
|
198 |
size_n=size_n,
|
@@ -263,7 +262,7 @@ def rand_marlin_weight_fp4_like(weight, group_size):
|
|
263 |
weight_ref = weight_ref * global_scale.to(weight.dtype) * \
|
264 |
scales.repeat_interleave(group_size, 1).to(weight.dtype)
|
265 |
|
266 |
-
marlin_qweight =
|
267 |
b_q_weight=fp4_weight.view(torch.int32).T.contiguous(),
|
268 |
perm=torch.empty(0, dtype=torch.int, device=device),
|
269 |
size_k=size_k,
|
|
|
5 |
|
6 |
import torch
|
7 |
|
8 |
+
from .. import gptq_marlin_gemm, gptq_marlin_repack
|
|
|
9 |
from .marlin_utils import (
|
10 |
USE_FP32_REDUCE_DEFAULT, marlin_make_workspace_new, marlin_permute_scales,
|
11 |
should_use_atomic_add_reduce)
|
12 |
+
from ..scalar_type import scalar_types
|
13 |
|
14 |
FP4_MARLIN_SUPPORTED_GROUP_SIZES = [16]
|
15 |
|
|
|
89 |
device=input.device,
|
90 |
dtype=input.dtype)
|
91 |
|
92 |
+
output = gptq_marlin_gemm(a=reshaped_x,
|
93 |
c=None,
|
94 |
b_q_weight=weight,
|
95 |
b_scales=weight_scale,
|
|
|
134 |
perm = torch.empty(0, dtype=torch.int, device=device)
|
135 |
qweight = layer.weight.view(torch.int32).T.contiguous()
|
136 |
|
137 |
+
marlin_qweight = gptq_marlin_repack(b_q_weight=qweight,
|
138 |
perm=perm,
|
139 |
size_k=part_size_k,
|
140 |
size_n=part_size_n,
|
|
|
191 |
for i in range(e):
|
192 |
qweight = weight[i].view(torch.int32).T.contiguous()
|
193 |
|
194 |
+
marlin_qweight = gptq_marlin_repack(b_q_weight=qweight,
|
195 |
perm=perm,
|
196 |
size_k=size_k,
|
197 |
size_n=size_n,
|
|
|
262 |
weight_ref = weight_ref * global_scale.to(weight.dtype) * \
|
263 |
scales.repeat_interleave(group_size, 1).to(weight.dtype)
|
264 |
|
265 |
+
marlin_qweight = gptq_marlin_repack(
|
266 |
b_q_weight=fp4_weight.view(torch.int32).T.contiguous(),
|
267 |
perm=torch.empty(0, dtype=torch.int, device=device),
|
268 |
size_k=size_k,
|
build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_fp8.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5 |
|
6 |
import torch
|
7 |
|
8 |
-
import
|
9 |
|
10 |
from .marlin_utils import USE_FP32_REDUCE_DEFAULT, marlin_make_workspace, marlin_permute_scales
|
11 |
|
@@ -51,7 +51,7 @@ def apply_fp8_marlin_linear(
|
|
51 |
device=input.device,
|
52 |
dtype=input.dtype)
|
53 |
|
54 |
-
output =
|
55 |
c=None,
|
56 |
b_q_weight=weight,
|
57 |
b_scales=weight_scale,
|
@@ -104,7 +104,7 @@ def marlin_quant_fp8_torch(weight, group_size):
|
|
104 |
weight_ref = fp8_weight.to(weight.dtype) * repeated_scales
|
105 |
|
106 |
packed_weight = pack_fp8_to_int32(fp8_weight, False).T.contiguous()
|
107 |
-
marlin_qweight =
|
108 |
b_q_weight=packed_weight,
|
109 |
perm=torch.empty(0, dtype=torch.int, device=device),
|
110 |
size_k=size_k,
|
|
|
5 |
|
6 |
import torch
|
7 |
|
8 |
+
from .. import gptq_marlin_gemm, gptq_marlin_repack
|
9 |
|
10 |
from .marlin_utils import USE_FP32_REDUCE_DEFAULT, marlin_make_workspace, marlin_permute_scales
|
11 |
|
|
|
51 |
device=input.device,
|
52 |
dtype=input.dtype)
|
53 |
|
54 |
+
output = gptq_marlin_gemm(a=reshaped_x,
|
55 |
c=None,
|
56 |
b_q_weight=weight,
|
57 |
b_scales=weight_scale,
|
|
|
104 |
weight_ref = fp8_weight.to(weight.dtype) * repeated_scales
|
105 |
|
106 |
packed_weight = pack_fp8_to_int32(fp8_weight, False).T.contiguous()
|
107 |
+
marlin_qweight = gptq_marlin_repack(
|
108 |
b_q_weight=packed_weight,
|
109 |
perm=torch.empty(0, dtype=torch.int, device=device),
|
110 |
size_k=size_k,
|
build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_test.py
CHANGED
@@ -5,8 +5,7 @@ from typing import List, Optional
|
|
5 |
import numpy as np
|
6 |
import torch
|
7 |
|
8 |
-
from
|
9 |
-
|
10 |
from .marlin_utils import GPTQ_MARLIN_TILE, marlin_permute_scales, marlin_zero_points
|
11 |
from .quant_utils import (
|
12 |
get_pack_factor,
|
|
|
5 |
import numpy as np
|
6 |
import torch
|
7 |
|
8 |
+
from ..scalar_type import ScalarType
|
|
|
9 |
from .marlin_utils import GPTQ_MARLIN_TILE, marlin_permute_scales, marlin_zero_points
|
10 |
from .quant_utils import (
|
11 |
get_pack_factor,
|
build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_test_24.py
CHANGED
@@ -6,8 +6,7 @@ from typing import List
|
|
6 |
import numpy
|
7 |
import torch
|
8 |
|
9 |
-
from
|
10 |
-
|
11 |
from .marlin_utils_test import marlin_weights
|
12 |
from .quant_utils import gptq_quantize_weights
|
13 |
|
|
|
6 |
import numpy
|
7 |
import torch
|
8 |
|
9 |
+
from ..scalar_type import ScalarType
|
|
|
10 |
from .marlin_utils_test import marlin_weights
|
11 |
from .quant_utils import gptq_quantize_weights
|
12 |
|
build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/quant_utils.py
CHANGED
@@ -5,7 +5,7 @@ from typing import List, Optional
|
|
5 |
import numpy
|
6 |
import torch
|
7 |
|
8 |
-
from
|
9 |
|
10 |
SUPPORTED_GPTQ_QUANT_TYPES = [scalar_types.uint4b8, scalar_types.uint8b128]
|
11 |
SUPPORTED_GROUP_SIZES = [-1, 32, 64, 128]
|
|
|
5 |
import numpy
|
6 |
import torch
|
7 |
|
8 |
+
from ..scalar_type import ScalarType, scalar_types
|
9 |
|
10 |
SUPPORTED_GPTQ_QUANT_TYPES = [scalar_types.uint4b8, scalar_types.uint8b128]
|
11 |
SUPPORTED_GROUP_SIZES = [-1, 32, 64, 128]
|
build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (1.02 kB). View file
|
|
build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/_ops.cpython-312.pyc
ADDED
Binary file (539 Bytes). View file
|
|
build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc
ADDED
Binary file (5.33 kB). View file
|
|
build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/cutlass.cpython-312.pyc
ADDED
Binary file (3.88 kB). View file
|
|
build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/marlin.cpython-312.pyc
ADDED
Binary file (7.9 kB). View file
|
|
build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/platforms.cpython-312.pyc
ADDED
Binary file (5.75 kB). View file
|
|
build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc
ADDED
Binary file (14.2 kB). View file
|
|
build/torch27-cxx11-cu126-aarch64-linux/quantization/_ops.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _quantization_3313895
|
3 |
+
ops = torch.ops._quantization_3313895
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_quantization_3313895::{op_name}"
|
build/torch27-cxx11-cu126-aarch64-linux/quantization/{_quantization_82ffd1f.abi3.so → _quantization_3313895.abi3.so}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c937e095108ec91e92ffcc629e2a62ff931aa0899b430088ced81ae6fee8b7b4
|
3 |
+
size 159999616
|
build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (186 Bytes). View file
|
|