danieldk HF Staff commited on
Commit
4dcf20d
·
1 Parent(s): 3313895

Build (aarch64)

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/__init__.cpython-312.pyc +0 -0
  2. build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/_ops.cpython-312.pyc +0 -0
  3. build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc +0 -0
  4. build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/cutlass.cpython-312.pyc +0 -0
  5. build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/marlin.cpython-312.pyc +0 -0
  6. build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/platforms.cpython-312.pyc +0 -0
  7. build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc +0 -0
  8. build/torch26-cxx11-cu126-aarch64-linux/quantization/_ops.py +3 -3
  9. build/torch26-cxx11-cu126-aarch64-linux/quantization/{_quantization_82ffd1f.abi3.so → _quantization_3313895.abi3.so} +2 -2
  10. build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc +0 -0
  11. build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc +0 -0
  12. build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc +0 -0
  13. build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc +0 -0
  14. build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc +0 -0
  15. build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils.py +3 -4
  16. build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_fp4.py +6 -7
  17. build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_fp8.py +3 -3
  18. build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_test.py +1 -2
  19. build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_test_24.py +1 -2
  20. build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/quant_utils.py +1 -1
  21. build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/__init__.cpython-312.pyc +0 -0
  22. build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/_ops.cpython-312.pyc +0 -0
  23. build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc +0 -0
  24. build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/cutlass.cpython-312.pyc +0 -0
  25. build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/marlin.cpython-312.pyc +0 -0
  26. build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/platforms.cpython-312.pyc +0 -0
  27. build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc +0 -0
  28. build/torch26-cxx98-cu126-aarch64-linux/quantization/_ops.py +3 -3
  29. build/torch26-cxx98-cu126-aarch64-linux/quantization/{_quantization_82ffd1f.abi3.so → _quantization_3313895.abi3.so} +2 -2
  30. build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc +0 -0
  31. build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc +0 -0
  32. build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc +0 -0
  33. build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc +0 -0
  34. build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc +0 -0
  35. build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils.py +3 -4
  36. build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_fp4.py +6 -7
  37. build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_fp8.py +3 -3
  38. build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_test.py +1 -2
  39. build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_test_24.py +1 -2
  40. build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/quant_utils.py +1 -1
  41. build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/__init__.cpython-312.pyc +0 -0
  42. build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/_ops.cpython-312.pyc +0 -0
  43. build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc +0 -0
  44. build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/cutlass.cpython-312.pyc +0 -0
  45. build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/marlin.cpython-312.pyc +0 -0
  46. build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/platforms.cpython-312.pyc +0 -0
  47. build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc +0 -0
  48. build/torch27-cxx11-cu126-aarch64-linux/quantization/_ops.py +3 -3
  49. build/torch27-cxx11-cu126-aarch64-linux/quantization/{_quantization_82ffd1f.abi3.so → _quantization_3313895.abi3.so} +2 -2
  50. build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc +0 -0
build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (1.02 kB). View file
 
build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/_ops.cpython-312.pyc ADDED
Binary file (539 Bytes). View file
 
build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc ADDED
Binary file (5.33 kB). View file
 
build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/cutlass.cpython-312.pyc ADDED
Binary file (3.88 kB). View file
 
build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/marlin.cpython-312.pyc ADDED
Binary file (7.9 kB). View file
 
build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/platforms.cpython-312.pyc ADDED
Binary file (5.75 kB). View file
 
build/torch26-cxx11-cu126-aarch64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc ADDED
Binary file (14.2 kB). View file
 
build/torch26-cxx11-cu126-aarch64-linux/quantization/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _quantization_82ffd1f
3
- ops = torch.ops._quantization_82ffd1f
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_quantization_82ffd1f::{op_name}"
 
1
  import torch
2
+ from . import _quantization_3313895
3
+ ops = torch.ops._quantization_3313895
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_quantization_3313895::{op_name}"
build/torch26-cxx11-cu126-aarch64-linux/quantization/{_quantization_82ffd1f.abi3.so → _quantization_3313895.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:163383785e3ca9a472f18c802591218f18ef3c9cde4bb83fa623575a8adfd085
3
- size 159999656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3ee57d2c1cc0ad50da66137aba7bc19177808ab669b04af56724343ff91ead0
3
+ size 159999664
build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (186 Bytes). View file
 
build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc ADDED
Binary file (17.6 kB). View file
 
build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc ADDED
Binary file (11.9 kB). View file
 
build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc ADDED
Binary file (5.31 kB). View file
 
build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc ADDED
Binary file (20 kB). View file
 
build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils.py CHANGED
@@ -6,8 +6,7 @@ from typing import Optional
6
  import numpy
7
  import torch
8
 
9
- import quantization as ops
10
- from quantization.scalar_type import ScalarType, scalar_types
11
 
12
  from .quant_utils import pack_cols, unpack_cols
13
 
@@ -383,7 +382,7 @@ def apply_gptq_marlin_linear(
383
  device=input.device,
384
  dtype=input.dtype)
385
 
386
- output = ops.gptq_marlin_gemm(reshaped_x,
387
  None,
388
  weight,
389
  weight_scale,
@@ -429,7 +428,7 @@ def apply_awq_marlin_linear(
429
  device=input.device,
430
  dtype=input.dtype)
431
 
432
- output = ops.gptq_marlin_gemm(reshaped_x,
433
  None,
434
  weight,
435
  weight_scale,
 
6
  import numpy
7
  import torch
8
 
9
+ from .. import ScalarType, gptq_marlin_gemm, scalar_types
 
10
 
11
  from .quant_utils import pack_cols, unpack_cols
12
 
 
382
  device=input.device,
383
  dtype=input.dtype)
384
 
385
+ output = gptq_marlin_gemm(reshaped_x,
386
  None,
387
  weight,
388
  weight_scale,
 
428
  device=input.device,
429
  dtype=input.dtype)
430
 
431
+ output = gptq_marlin_gemm(reshaped_x,
432
  None,
433
  weight,
434
  weight_scale,
build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_fp4.py CHANGED
@@ -5,12 +5,11 @@ from typing import Optional
5
 
6
  import torch
7
 
8
- import quantization as ops
9
-
10
  from .marlin_utils import (
11
  USE_FP32_REDUCE_DEFAULT, marlin_make_workspace_new, marlin_permute_scales,
12
  should_use_atomic_add_reduce)
13
- from quantization.scalar_type import scalar_types
14
 
15
  FP4_MARLIN_SUPPORTED_GROUP_SIZES = [16]
16
 
@@ -90,7 +89,7 @@ def apply_fp4_marlin_linear(
90
  device=input.device,
91
  dtype=input.dtype)
92
 
93
- output = ops.gptq_marlin_gemm(a=reshaped_x,
94
  c=None,
95
  b_q_weight=weight,
96
  b_scales=weight_scale,
@@ -135,7 +134,7 @@ def prepare_fp4_layer_for_marlin(layer: torch.nn.Module) -> None:
135
  perm = torch.empty(0, dtype=torch.int, device=device)
136
  qweight = layer.weight.view(torch.int32).T.contiguous()
137
 
138
- marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight,
139
  perm=perm,
140
  size_k=part_size_k,
141
  size_n=part_size_n,
@@ -192,7 +191,7 @@ def prepare_moe_fp4_layer_for_marlin(layer: torch.nn.Module) -> None:
192
  for i in range(e):
193
  qweight = weight[i].view(torch.int32).T.contiguous()
194
 
195
- marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight,
196
  perm=perm,
197
  size_k=size_k,
198
  size_n=size_n,
@@ -263,7 +262,7 @@ def rand_marlin_weight_fp4_like(weight, group_size):
263
  weight_ref = weight_ref * global_scale.to(weight.dtype) * \
264
  scales.repeat_interleave(group_size, 1).to(weight.dtype)
265
 
266
- marlin_qweight = ops.gptq_marlin_repack(
267
  b_q_weight=fp4_weight.view(torch.int32).T.contiguous(),
268
  perm=torch.empty(0, dtype=torch.int, device=device),
269
  size_k=size_k,
 
5
 
6
  import torch
7
 
8
+ from .. import gptq_marlin_gemm, gptq_marlin_repack
 
9
  from .marlin_utils import (
10
  USE_FP32_REDUCE_DEFAULT, marlin_make_workspace_new, marlin_permute_scales,
11
  should_use_atomic_add_reduce)
12
+ from ..scalar_type import scalar_types
13
 
14
  FP4_MARLIN_SUPPORTED_GROUP_SIZES = [16]
15
 
 
89
  device=input.device,
90
  dtype=input.dtype)
91
 
92
+ output = gptq_marlin_gemm(a=reshaped_x,
93
  c=None,
94
  b_q_weight=weight,
95
  b_scales=weight_scale,
 
134
  perm = torch.empty(0, dtype=torch.int, device=device)
135
  qweight = layer.weight.view(torch.int32).T.contiguous()
136
 
137
+ marlin_qweight = gptq_marlin_repack(b_q_weight=qweight,
138
  perm=perm,
139
  size_k=part_size_k,
140
  size_n=part_size_n,
 
191
  for i in range(e):
192
  qweight = weight[i].view(torch.int32).T.contiguous()
193
 
194
+ marlin_qweight = gptq_marlin_repack(b_q_weight=qweight,
195
  perm=perm,
196
  size_k=size_k,
197
  size_n=size_n,
 
262
  weight_ref = weight_ref * global_scale.to(weight.dtype) * \
263
  scales.repeat_interleave(group_size, 1).to(weight.dtype)
264
 
265
+ marlin_qweight = gptq_marlin_repack(
266
  b_q_weight=fp4_weight.view(torch.int32).T.contiguous(),
267
  perm=torch.empty(0, dtype=torch.int, device=device),
268
  size_k=size_k,
build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_fp8.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
 
6
  import torch
7
 
8
- import quantization as ops
9
 
10
  from .marlin_utils import USE_FP32_REDUCE_DEFAULT, marlin_make_workspace, marlin_permute_scales
11
 
@@ -51,7 +51,7 @@ def apply_fp8_marlin_linear(
51
  device=input.device,
52
  dtype=input.dtype)
53
 
54
- output = ops.gptq_marlin_gemm(a=reshaped_x,
55
  c=None,
56
  b_q_weight=weight,
57
  b_scales=weight_scale,
@@ -104,7 +104,7 @@ def marlin_quant_fp8_torch(weight, group_size):
104
  weight_ref = fp8_weight.to(weight.dtype) * repeated_scales
105
 
106
  packed_weight = pack_fp8_to_int32(fp8_weight, False).T.contiguous()
107
- marlin_qweight = ops.gptq_marlin_repack(
108
  b_q_weight=packed_weight,
109
  perm=torch.empty(0, dtype=torch.int, device=device),
110
  size_k=size_k,
 
5
 
6
  import torch
7
 
8
+ from .. import gptq_marlin_gemm, gptq_marlin_repack
9
 
10
  from .marlin_utils import USE_FP32_REDUCE_DEFAULT, marlin_make_workspace, marlin_permute_scales
11
 
 
51
  device=input.device,
52
  dtype=input.dtype)
53
 
54
+ output = gptq_marlin_gemm(a=reshaped_x,
55
  c=None,
56
  b_q_weight=weight,
57
  b_scales=weight_scale,
 
104
  weight_ref = fp8_weight.to(weight.dtype) * repeated_scales
105
 
106
  packed_weight = pack_fp8_to_int32(fp8_weight, False).T.contiguous()
107
+ marlin_qweight = gptq_marlin_repack(
108
  b_q_weight=packed_weight,
109
  perm=torch.empty(0, dtype=torch.int, device=device),
110
  size_k=size_k,
build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_test.py CHANGED
@@ -5,8 +5,7 @@ from typing import List, Optional
5
  import numpy as np
6
  import torch
7
 
8
- from quantization.scalar_type import ScalarType
9
-
10
  from .marlin_utils import GPTQ_MARLIN_TILE, marlin_permute_scales, marlin_zero_points
11
  from .quant_utils import (
12
  get_pack_factor,
 
5
  import numpy as np
6
  import torch
7
 
8
+ from ..scalar_type import ScalarType
 
9
  from .marlin_utils import GPTQ_MARLIN_TILE, marlin_permute_scales, marlin_zero_points
10
  from .quant_utils import (
11
  get_pack_factor,
build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/marlin_utils_test_24.py CHANGED
@@ -6,8 +6,7 @@ from typing import List
6
  import numpy
7
  import torch
8
 
9
- from quantization.scalar_type import ScalarType
10
-
11
  from .marlin_utils_test import marlin_weights
12
  from .quant_utils import gptq_quantize_weights
13
 
 
6
  import numpy
7
  import torch
8
 
9
+ from ..scalar_type import ScalarType
 
10
  from .marlin_utils_test import marlin_weights
11
  from .quant_utils import gptq_quantize_weights
12
 
build/torch26-cxx11-cu126-aarch64-linux/quantization/utils/quant_utils.py CHANGED
@@ -5,7 +5,7 @@ from typing import List, Optional
5
  import numpy
6
  import torch
7
 
8
- from quantization.scalar_type import ScalarType, scalar_types
9
 
10
  SUPPORTED_GPTQ_QUANT_TYPES = [scalar_types.uint4b8, scalar_types.uint8b128]
11
  SUPPORTED_GROUP_SIZES = [-1, 32, 64, 128]
 
5
  import numpy
6
  import torch
7
 
8
+ from ..scalar_type import ScalarType, scalar_types
9
 
10
  SUPPORTED_GPTQ_QUANT_TYPES = [scalar_types.uint4b8, scalar_types.uint8b128]
11
  SUPPORTED_GROUP_SIZES = [-1, 32, 64, 128]
build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (1.02 kB). View file
 
build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/_ops.cpython-312.pyc ADDED
Binary file (539 Bytes). View file
 
build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc ADDED
Binary file (5.33 kB). View file
 
build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/cutlass.cpython-312.pyc ADDED
Binary file (3.88 kB). View file
 
build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/marlin.cpython-312.pyc ADDED
Binary file (7.9 kB). View file
 
build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/platforms.cpython-312.pyc ADDED
Binary file (5.75 kB). View file
 
build/torch26-cxx98-cu126-aarch64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc ADDED
Binary file (14.2 kB). View file
 
build/torch26-cxx98-cu126-aarch64-linux/quantization/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _quantization_82ffd1f
3
- ops = torch.ops._quantization_82ffd1f
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_quantization_82ffd1f::{op_name}"
 
1
  import torch
2
+ from . import _quantization_3313895
3
+ ops = torch.ops._quantization_3313895
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_quantization_3313895::{op_name}"
build/torch26-cxx98-cu126-aarch64-linux/quantization/{_quantization_82ffd1f.abi3.so → _quantization_3313895.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2bf0942eeeb2b821331211fc74ce7c37fccad95fc1ac6aa8bbc322a6f8ac249
3
- size 159991696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61f73550ec23c013a886c34cb3f24c5aa55ea2f20e957988ba114ecc207ecac9
3
+ size 159991688
build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (186 Bytes). View file
 
build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc ADDED
Binary file (17.6 kB). View file
 
build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc ADDED
Binary file (11.9 kB). View file
 
build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc ADDED
Binary file (5.31 kB). View file
 
build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc ADDED
Binary file (20 kB). View file
 
build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils.py CHANGED
@@ -6,8 +6,7 @@ from typing import Optional
6
  import numpy
7
  import torch
8
 
9
- import quantization as ops
10
- from quantization.scalar_type import ScalarType, scalar_types
11
 
12
  from .quant_utils import pack_cols, unpack_cols
13
 
@@ -383,7 +382,7 @@ def apply_gptq_marlin_linear(
383
  device=input.device,
384
  dtype=input.dtype)
385
 
386
- output = ops.gptq_marlin_gemm(reshaped_x,
387
  None,
388
  weight,
389
  weight_scale,
@@ -429,7 +428,7 @@ def apply_awq_marlin_linear(
429
  device=input.device,
430
  dtype=input.dtype)
431
 
432
- output = ops.gptq_marlin_gemm(reshaped_x,
433
  None,
434
  weight,
435
  weight_scale,
 
6
  import numpy
7
  import torch
8
 
9
+ from .. import ScalarType, gptq_marlin_gemm, scalar_types
 
10
 
11
  from .quant_utils import pack_cols, unpack_cols
12
 
 
382
  device=input.device,
383
  dtype=input.dtype)
384
 
385
+ output = gptq_marlin_gemm(reshaped_x,
386
  None,
387
  weight,
388
  weight_scale,
 
428
  device=input.device,
429
  dtype=input.dtype)
430
 
431
+ output = gptq_marlin_gemm(reshaped_x,
432
  None,
433
  weight,
434
  weight_scale,
build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_fp4.py CHANGED
@@ -5,12 +5,11 @@ from typing import Optional
5
 
6
  import torch
7
 
8
- import quantization as ops
9
-
10
  from .marlin_utils import (
11
  USE_FP32_REDUCE_DEFAULT, marlin_make_workspace_new, marlin_permute_scales,
12
  should_use_atomic_add_reduce)
13
- from quantization.scalar_type import scalar_types
14
 
15
  FP4_MARLIN_SUPPORTED_GROUP_SIZES = [16]
16
 
@@ -90,7 +89,7 @@ def apply_fp4_marlin_linear(
90
  device=input.device,
91
  dtype=input.dtype)
92
 
93
- output = ops.gptq_marlin_gemm(a=reshaped_x,
94
  c=None,
95
  b_q_weight=weight,
96
  b_scales=weight_scale,
@@ -135,7 +134,7 @@ def prepare_fp4_layer_for_marlin(layer: torch.nn.Module) -> None:
135
  perm = torch.empty(0, dtype=torch.int, device=device)
136
  qweight = layer.weight.view(torch.int32).T.contiguous()
137
 
138
- marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight,
139
  perm=perm,
140
  size_k=part_size_k,
141
  size_n=part_size_n,
@@ -192,7 +191,7 @@ def prepare_moe_fp4_layer_for_marlin(layer: torch.nn.Module) -> None:
192
  for i in range(e):
193
  qweight = weight[i].view(torch.int32).T.contiguous()
194
 
195
- marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight,
196
  perm=perm,
197
  size_k=size_k,
198
  size_n=size_n,
@@ -263,7 +262,7 @@ def rand_marlin_weight_fp4_like(weight, group_size):
263
  weight_ref = weight_ref * global_scale.to(weight.dtype) * \
264
  scales.repeat_interleave(group_size, 1).to(weight.dtype)
265
 
266
- marlin_qweight = ops.gptq_marlin_repack(
267
  b_q_weight=fp4_weight.view(torch.int32).T.contiguous(),
268
  perm=torch.empty(0, dtype=torch.int, device=device),
269
  size_k=size_k,
 
5
 
6
  import torch
7
 
8
+ from .. import gptq_marlin_gemm, gptq_marlin_repack
 
9
  from .marlin_utils import (
10
  USE_FP32_REDUCE_DEFAULT, marlin_make_workspace_new, marlin_permute_scales,
11
  should_use_atomic_add_reduce)
12
+ from ..scalar_type import scalar_types
13
 
14
  FP4_MARLIN_SUPPORTED_GROUP_SIZES = [16]
15
 
 
89
  device=input.device,
90
  dtype=input.dtype)
91
 
92
+ output = gptq_marlin_gemm(a=reshaped_x,
93
  c=None,
94
  b_q_weight=weight,
95
  b_scales=weight_scale,
 
134
  perm = torch.empty(0, dtype=torch.int, device=device)
135
  qweight = layer.weight.view(torch.int32).T.contiguous()
136
 
137
+ marlin_qweight = gptq_marlin_repack(b_q_weight=qweight,
138
  perm=perm,
139
  size_k=part_size_k,
140
  size_n=part_size_n,
 
191
  for i in range(e):
192
  qweight = weight[i].view(torch.int32).T.contiguous()
193
 
194
+ marlin_qweight = gptq_marlin_repack(b_q_weight=qweight,
195
  perm=perm,
196
  size_k=size_k,
197
  size_n=size_n,
 
262
  weight_ref = weight_ref * global_scale.to(weight.dtype) * \
263
  scales.repeat_interleave(group_size, 1).to(weight.dtype)
264
 
265
+ marlin_qweight = gptq_marlin_repack(
266
  b_q_weight=fp4_weight.view(torch.int32).T.contiguous(),
267
  perm=torch.empty(0, dtype=torch.int, device=device),
268
  size_k=size_k,
build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_fp8.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
 
6
  import torch
7
 
8
- import quantization as ops
9
 
10
  from .marlin_utils import USE_FP32_REDUCE_DEFAULT, marlin_make_workspace, marlin_permute_scales
11
 
@@ -51,7 +51,7 @@ def apply_fp8_marlin_linear(
51
  device=input.device,
52
  dtype=input.dtype)
53
 
54
- output = ops.gptq_marlin_gemm(a=reshaped_x,
55
  c=None,
56
  b_q_weight=weight,
57
  b_scales=weight_scale,
@@ -104,7 +104,7 @@ def marlin_quant_fp8_torch(weight, group_size):
104
  weight_ref = fp8_weight.to(weight.dtype) * repeated_scales
105
 
106
  packed_weight = pack_fp8_to_int32(fp8_weight, False).T.contiguous()
107
- marlin_qweight = ops.gptq_marlin_repack(
108
  b_q_weight=packed_weight,
109
  perm=torch.empty(0, dtype=torch.int, device=device),
110
  size_k=size_k,
 
5
 
6
  import torch
7
 
8
+ from .. import gptq_marlin_gemm, gptq_marlin_repack
9
 
10
  from .marlin_utils import USE_FP32_REDUCE_DEFAULT, marlin_make_workspace, marlin_permute_scales
11
 
 
51
  device=input.device,
52
  dtype=input.dtype)
53
 
54
+ output = gptq_marlin_gemm(a=reshaped_x,
55
  c=None,
56
  b_q_weight=weight,
57
  b_scales=weight_scale,
 
104
  weight_ref = fp8_weight.to(weight.dtype) * repeated_scales
105
 
106
  packed_weight = pack_fp8_to_int32(fp8_weight, False).T.contiguous()
107
+ marlin_qweight = gptq_marlin_repack(
108
  b_q_weight=packed_weight,
109
  perm=torch.empty(0, dtype=torch.int, device=device),
110
  size_k=size_k,
build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_test.py CHANGED
@@ -5,8 +5,7 @@ from typing import List, Optional
5
  import numpy as np
6
  import torch
7
 
8
- from quantization.scalar_type import ScalarType
9
-
10
  from .marlin_utils import GPTQ_MARLIN_TILE, marlin_permute_scales, marlin_zero_points
11
  from .quant_utils import (
12
  get_pack_factor,
 
5
  import numpy as np
6
  import torch
7
 
8
+ from ..scalar_type import ScalarType
 
9
  from .marlin_utils import GPTQ_MARLIN_TILE, marlin_permute_scales, marlin_zero_points
10
  from .quant_utils import (
11
  get_pack_factor,
build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/marlin_utils_test_24.py CHANGED
@@ -6,8 +6,7 @@ from typing import List
6
  import numpy
7
  import torch
8
 
9
- from quantization.scalar_type import ScalarType
10
-
11
  from .marlin_utils_test import marlin_weights
12
  from .quant_utils import gptq_quantize_weights
13
 
 
6
  import numpy
7
  import torch
8
 
9
+ from ..scalar_type import ScalarType
 
10
  from .marlin_utils_test import marlin_weights
11
  from .quant_utils import gptq_quantize_weights
12
 
build/torch26-cxx98-cu126-aarch64-linux/quantization/utils/quant_utils.py CHANGED
@@ -5,7 +5,7 @@ from typing import List, Optional
5
  import numpy
6
  import torch
7
 
8
- from quantization.scalar_type import ScalarType, scalar_types
9
 
10
  SUPPORTED_GPTQ_QUANT_TYPES = [scalar_types.uint4b8, scalar_types.uint8b128]
11
  SUPPORTED_GROUP_SIZES = [-1, 32, 64, 128]
 
5
  import numpy
6
  import torch
7
 
8
+ from ..scalar_type import ScalarType, scalar_types
9
 
10
  SUPPORTED_GPTQ_QUANT_TYPES = [scalar_types.uint4b8, scalar_types.uint8b128]
11
  SUPPORTED_GROUP_SIZES = [-1, 32, 64, 128]
build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (1.02 kB). View file
 
build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/_ops.cpython-312.pyc ADDED
Binary file (539 Bytes). View file
 
build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc ADDED
Binary file (5.33 kB). View file
 
build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/cutlass.cpython-312.pyc ADDED
Binary file (3.88 kB). View file
 
build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/marlin.cpython-312.pyc ADDED
Binary file (7.9 kB). View file
 
build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/platforms.cpython-312.pyc ADDED
Binary file (5.75 kB). View file
 
build/torch27-cxx11-cu126-aarch64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc ADDED
Binary file (14.2 kB). View file
 
build/torch27-cxx11-cu126-aarch64-linux/quantization/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _quantization_82ffd1f
3
- ops = torch.ops._quantization_82ffd1f
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_quantization_82ffd1f::{op_name}"
 
1
  import torch
2
+ from . import _quantization_3313895
3
+ ops = torch.ops._quantization_3313895
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_quantization_3313895::{op_name}"
build/torch27-cxx11-cu126-aarch64-linux/quantization/{_quantization_82ffd1f.abi3.so → _quantization_3313895.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef0e68ff25982049ce0b6af570f6546c8f62a49e373397d352f89376c1805de4
3
- size 159934080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c937e095108ec91e92ffcc629e2a62ff931aa0899b430088ced81ae6fee8b7b4
3
+ size 159999616
build/torch27-cxx11-cu126-aarch64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (186 Bytes). View file