danieldk HF Staff commited on
Commit
b65f8ab
·
1 Parent(s): 4dcf20d

Build (x86_64)

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc +0 -0
  2. build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc +0 -0
  3. build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc +0 -0
  4. build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc +0 -0
  5. build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc +0 -0
  6. build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc +0 -0
  7. build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc +0 -0
  8. build/torch26-cxx11-cu118-x86_64-linux/quantization/_ops.py +3 -3
  9. build/torch26-cxx11-cu118-x86_64-linux/quantization/{_quantization_dfa7d18.abi3.so → _quantization_3313895.abi3.so} +2 -2
  10. build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc +0 -0
  11. build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc +0 -0
  12. build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc +0 -0
  13. build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc +0 -0
  14. build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc +0 -0
  15. build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils.py +3 -4
  16. build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_fp4.py +6 -7
  17. build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_fp8.py +3 -3
  18. build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_test.py +1 -2
  19. build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_test_24.py +1 -2
  20. build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/quant_utils.py +1 -1
  21. build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc +0 -0
  22. build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc +0 -0
  23. build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc +0 -0
  24. build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc +0 -0
  25. build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc +0 -0
  26. build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc +0 -0
  27. build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc +0 -0
  28. build/torch26-cxx11-cu124-x86_64-linux/quantization/_ops.py +3 -3
  29. build/torch26-cxx11-cu124-x86_64-linux/quantization/{_quantization_dfa7d18.abi3.so → _quantization_3313895.abi3.so} +2 -2
  30. build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc +0 -0
  31. build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc +0 -0
  32. build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc +0 -0
  33. build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc +0 -0
  34. build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc +0 -0
  35. build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils.py +3 -4
  36. build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils_fp4.py +6 -7
  37. build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils_fp8.py +3 -3
  38. build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils_test.py +1 -2
  39. build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils_test_24.py +1 -2
  40. build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/quant_utils.py +1 -1
  41. build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc +0 -0
  42. build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc +0 -0
  43. build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc +0 -0
  44. build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc +0 -0
  45. build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc +0 -0
  46. build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc +0 -0
  47. build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc +0 -0
  48. build/torch26-cxx11-cu126-x86_64-linux/quantization/_ops.py +3 -3
  49. build/torch26-cxx11-cu126-x86_64-linux/quantization/{_quantization_dfa7d18.abi3.so → _quantization_3313895.abi3.so} +2 -2
  50. build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc +0 -0
build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (1.02 kB). View file
 
build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc ADDED
Binary file (538 Bytes). View file
 
build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc ADDED
Binary file (5.33 kB). View file
 
build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc ADDED
Binary file (3.88 kB). View file
 
build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc ADDED
Binary file (7.9 kB). View file
 
build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc ADDED
Binary file (5.75 kB). View file
 
build/torch26-cxx11-cu118-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc ADDED
Binary file (14.2 kB). View file
 
build/torch26-cxx11-cu118-x86_64-linux/quantization/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _quantization_dfa7d18
3
- ops = torch.ops._quantization_dfa7d18
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_quantization_dfa7d18::{op_name}"
 
1
  import torch
2
+ from . import _quantization_3313895
3
+ ops = torch.ops._quantization_3313895
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_quantization_3313895::{op_name}"
build/torch26-cxx11-cu118-x86_64-linux/quantization/{_quantization_dfa7d18.abi3.so → _quantization_3313895.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05b3dbcc1c3200458ec526bc95169a8b286704dbbfe93b1b5bb580d490be4f3d
3
- size 155751904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8234b0e279e6da2eb6bfe5a6a88635365f861d8a8cbe2a1c3340f507c37ca487
3
+ size 155756104
build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (185 Bytes). View file
 
build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc ADDED
Binary file (17.6 kB). View file
 
build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc ADDED
Binary file (11.9 kB). View file
 
build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc ADDED
Binary file (5.31 kB). View file
 
build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc ADDED
Binary file (20 kB). View file
 
build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils.py CHANGED
@@ -6,8 +6,7 @@ from typing import Optional
6
  import numpy
7
  import torch
8
 
9
- import quantization as ops
10
- from quantization.scalar_type import ScalarType, scalar_types
11
 
12
  from .quant_utils import pack_cols, unpack_cols
13
 
@@ -383,7 +382,7 @@ def apply_gptq_marlin_linear(
383
  device=input.device,
384
  dtype=input.dtype)
385
 
386
- output = ops.gptq_marlin_gemm(reshaped_x,
387
  None,
388
  weight,
389
  weight_scale,
@@ -429,7 +428,7 @@ def apply_awq_marlin_linear(
429
  device=input.device,
430
  dtype=input.dtype)
431
 
432
- output = ops.gptq_marlin_gemm(reshaped_x,
433
  None,
434
  weight,
435
  weight_scale,
 
6
  import numpy
7
  import torch
8
 
9
+ from .. import ScalarType, gptq_marlin_gemm, scalar_types
 
10
 
11
  from .quant_utils import pack_cols, unpack_cols
12
 
 
382
  device=input.device,
383
  dtype=input.dtype)
384
 
385
+ output = gptq_marlin_gemm(reshaped_x,
386
  None,
387
  weight,
388
  weight_scale,
 
428
  device=input.device,
429
  dtype=input.dtype)
430
 
431
+ output = gptq_marlin_gemm(reshaped_x,
432
  None,
433
  weight,
434
  weight_scale,
build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_fp4.py CHANGED
@@ -5,12 +5,11 @@ from typing import Optional
5
 
6
  import torch
7
 
8
- import quantization as ops
9
-
10
  from .marlin_utils import (
11
  USE_FP32_REDUCE_DEFAULT, marlin_make_workspace_new, marlin_permute_scales,
12
  should_use_atomic_add_reduce)
13
- from quantization.scalar_type import scalar_types
14
 
15
  FP4_MARLIN_SUPPORTED_GROUP_SIZES = [16]
16
 
@@ -90,7 +89,7 @@ def apply_fp4_marlin_linear(
90
  device=input.device,
91
  dtype=input.dtype)
92
 
93
- output = ops.gptq_marlin_gemm(a=reshaped_x,
94
  c=None,
95
  b_q_weight=weight,
96
  b_scales=weight_scale,
@@ -135,7 +134,7 @@ def prepare_fp4_layer_for_marlin(layer: torch.nn.Module) -> None:
135
  perm = torch.empty(0, dtype=torch.int, device=device)
136
  qweight = layer.weight.view(torch.int32).T.contiguous()
137
 
138
- marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight,
139
  perm=perm,
140
  size_k=part_size_k,
141
  size_n=part_size_n,
@@ -192,7 +191,7 @@ def prepare_moe_fp4_layer_for_marlin(layer: torch.nn.Module) -> None:
192
  for i in range(e):
193
  qweight = weight[i].view(torch.int32).T.contiguous()
194
 
195
- marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight,
196
  perm=perm,
197
  size_k=size_k,
198
  size_n=size_n,
@@ -263,7 +262,7 @@ def rand_marlin_weight_fp4_like(weight, group_size):
263
  weight_ref = weight_ref * global_scale.to(weight.dtype) * \
264
  scales.repeat_interleave(group_size, 1).to(weight.dtype)
265
 
266
- marlin_qweight = ops.gptq_marlin_repack(
267
  b_q_weight=fp4_weight.view(torch.int32).T.contiguous(),
268
  perm=torch.empty(0, dtype=torch.int, device=device),
269
  size_k=size_k,
 
5
 
6
  import torch
7
 
8
+ from .. import gptq_marlin_gemm, gptq_marlin_repack
 
9
  from .marlin_utils import (
10
  USE_FP32_REDUCE_DEFAULT, marlin_make_workspace_new, marlin_permute_scales,
11
  should_use_atomic_add_reduce)
12
+ from ..scalar_type import scalar_types
13
 
14
  FP4_MARLIN_SUPPORTED_GROUP_SIZES = [16]
15
 
 
89
  device=input.device,
90
  dtype=input.dtype)
91
 
92
+ output = gptq_marlin_gemm(a=reshaped_x,
93
  c=None,
94
  b_q_weight=weight,
95
  b_scales=weight_scale,
 
134
  perm = torch.empty(0, dtype=torch.int, device=device)
135
  qweight = layer.weight.view(torch.int32).T.contiguous()
136
 
137
+ marlin_qweight = gptq_marlin_repack(b_q_weight=qweight,
138
  perm=perm,
139
  size_k=part_size_k,
140
  size_n=part_size_n,
 
191
  for i in range(e):
192
  qweight = weight[i].view(torch.int32).T.contiguous()
193
 
194
+ marlin_qweight = gptq_marlin_repack(b_q_weight=qweight,
195
  perm=perm,
196
  size_k=size_k,
197
  size_n=size_n,
 
262
  weight_ref = weight_ref * global_scale.to(weight.dtype) * \
263
  scales.repeat_interleave(group_size, 1).to(weight.dtype)
264
 
265
+ marlin_qweight = gptq_marlin_repack(
266
  b_q_weight=fp4_weight.view(torch.int32).T.contiguous(),
267
  perm=torch.empty(0, dtype=torch.int, device=device),
268
  size_k=size_k,
build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_fp8.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
 
6
  import torch
7
 
8
- import quantization as ops
9
 
10
  from .marlin_utils import USE_FP32_REDUCE_DEFAULT, marlin_make_workspace, marlin_permute_scales
11
 
@@ -51,7 +51,7 @@ def apply_fp8_marlin_linear(
51
  device=input.device,
52
  dtype=input.dtype)
53
 
54
- output = ops.gptq_marlin_gemm(a=reshaped_x,
55
  c=None,
56
  b_q_weight=weight,
57
  b_scales=weight_scale,
@@ -104,7 +104,7 @@ def marlin_quant_fp8_torch(weight, group_size):
104
  weight_ref = fp8_weight.to(weight.dtype) * repeated_scales
105
 
106
  packed_weight = pack_fp8_to_int32(fp8_weight, False).T.contiguous()
107
- marlin_qweight = ops.gptq_marlin_repack(
108
  b_q_weight=packed_weight,
109
  perm=torch.empty(0, dtype=torch.int, device=device),
110
  size_k=size_k,
 
5
 
6
  import torch
7
 
8
+ from .. import gptq_marlin_gemm, gptq_marlin_repack
9
 
10
  from .marlin_utils import USE_FP32_REDUCE_DEFAULT, marlin_make_workspace, marlin_permute_scales
11
 
 
51
  device=input.device,
52
  dtype=input.dtype)
53
 
54
+ output = gptq_marlin_gemm(a=reshaped_x,
55
  c=None,
56
  b_q_weight=weight,
57
  b_scales=weight_scale,
 
104
  weight_ref = fp8_weight.to(weight.dtype) * repeated_scales
105
 
106
  packed_weight = pack_fp8_to_int32(fp8_weight, False).T.contiguous()
107
+ marlin_qweight = gptq_marlin_repack(
108
  b_q_weight=packed_weight,
109
  perm=torch.empty(0, dtype=torch.int, device=device),
110
  size_k=size_k,
build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_test.py CHANGED
@@ -5,8 +5,7 @@ from typing import List, Optional
5
  import numpy as np
6
  import torch
7
 
8
- from quantization.scalar_type import ScalarType
9
-
10
  from .marlin_utils import GPTQ_MARLIN_TILE, marlin_permute_scales, marlin_zero_points
11
  from .quant_utils import (
12
  get_pack_factor,
 
5
  import numpy as np
6
  import torch
7
 
8
+ from ..scalar_type import ScalarType
 
9
  from .marlin_utils import GPTQ_MARLIN_TILE, marlin_permute_scales, marlin_zero_points
10
  from .quant_utils import (
11
  get_pack_factor,
build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/marlin_utils_test_24.py CHANGED
@@ -6,8 +6,7 @@ from typing import List
6
  import numpy
7
  import torch
8
 
9
- from quantization.scalar_type import ScalarType
10
-
11
  from .marlin_utils_test import marlin_weights
12
  from .quant_utils import gptq_quantize_weights
13
 
 
6
  import numpy
7
  import torch
8
 
9
+ from ..scalar_type import ScalarType
 
10
  from .marlin_utils_test import marlin_weights
11
  from .quant_utils import gptq_quantize_weights
12
 
build/torch26-cxx11-cu118-x86_64-linux/quantization/utils/quant_utils.py CHANGED
@@ -5,7 +5,7 @@ from typing import List, Optional
5
  import numpy
6
  import torch
7
 
8
- from quantization.scalar_type import ScalarType, scalar_types
9
 
10
  SUPPORTED_GPTQ_QUANT_TYPES = [scalar_types.uint4b8, scalar_types.uint8b128]
11
  SUPPORTED_GROUP_SIZES = [-1, 32, 64, 128]
 
5
  import numpy
6
  import torch
7
 
8
+ from ..scalar_type import ScalarType, scalar_types
9
 
10
  SUPPORTED_GPTQ_QUANT_TYPES = [scalar_types.uint4b8, scalar_types.uint8b128]
11
  SUPPORTED_GROUP_SIZES = [-1, 32, 64, 128]
build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (1.02 kB). View file
 
build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc ADDED
Binary file (538 Bytes). View file
 
build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc ADDED
Binary file (5.33 kB). View file
 
build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc ADDED
Binary file (3.88 kB). View file
 
build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc ADDED
Binary file (7.9 kB). View file
 
build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc ADDED
Binary file (5.75 kB). View file
 
build/torch26-cxx11-cu124-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc ADDED
Binary file (14.2 kB). View file
 
build/torch26-cxx11-cu124-x86_64-linux/quantization/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _quantization_dfa7d18
3
- ops = torch.ops._quantization_dfa7d18
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_quantization_dfa7d18::{op_name}"
 
1
  import torch
2
+ from . import _quantization_3313895
3
+ ops = torch.ops._quantization_3313895
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_quantization_3313895::{op_name}"
build/torch26-cxx11-cu124-x86_64-linux/quantization/{_quantization_dfa7d18.abi3.so → _quantization_3313895.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5dc49e9b5709f18d3e12ab2d76e37743c31cb2602d219e80173a9c5c0ba1acd
3
- size 159574040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70790ec67aaa48046db424362f76724cf70ecc91bf479c88da71ea6592bb637f
3
+ size 159578136
build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (185 Bytes). View file
 
build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc ADDED
Binary file (17.6 kB). View file
 
build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp4.cpython-312.pyc ADDED
Binary file (11.9 kB). View file
 
build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc ADDED
Binary file (5.31 kB). View file
 
build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/__pycache__/quant_utils.cpython-312.pyc ADDED
Binary file (20 kB). View file
 
build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils.py CHANGED
@@ -6,8 +6,7 @@ from typing import Optional
6
  import numpy
7
  import torch
8
 
9
- import quantization as ops
10
- from quantization.scalar_type import ScalarType, scalar_types
11
 
12
  from .quant_utils import pack_cols, unpack_cols
13
 
@@ -383,7 +382,7 @@ def apply_gptq_marlin_linear(
383
  device=input.device,
384
  dtype=input.dtype)
385
 
386
- output = ops.gptq_marlin_gemm(reshaped_x,
387
  None,
388
  weight,
389
  weight_scale,
@@ -429,7 +428,7 @@ def apply_awq_marlin_linear(
429
  device=input.device,
430
  dtype=input.dtype)
431
 
432
- output = ops.gptq_marlin_gemm(reshaped_x,
433
  None,
434
  weight,
435
  weight_scale,
 
6
  import numpy
7
  import torch
8
 
9
+ from .. import ScalarType, gptq_marlin_gemm, scalar_types
 
10
 
11
  from .quant_utils import pack_cols, unpack_cols
12
 
 
382
  device=input.device,
383
  dtype=input.dtype)
384
 
385
+ output = gptq_marlin_gemm(reshaped_x,
386
  None,
387
  weight,
388
  weight_scale,
 
428
  device=input.device,
429
  dtype=input.dtype)
430
 
431
+ output = gptq_marlin_gemm(reshaped_x,
432
  None,
433
  weight,
434
  weight_scale,
build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils_fp4.py CHANGED
@@ -5,12 +5,11 @@ from typing import Optional
5
 
6
  import torch
7
 
8
- import quantization as ops
9
-
10
  from .marlin_utils import (
11
  USE_FP32_REDUCE_DEFAULT, marlin_make_workspace_new, marlin_permute_scales,
12
  should_use_atomic_add_reduce)
13
- from quantization.scalar_type import scalar_types
14
 
15
  FP4_MARLIN_SUPPORTED_GROUP_SIZES = [16]
16
 
@@ -90,7 +89,7 @@ def apply_fp4_marlin_linear(
90
  device=input.device,
91
  dtype=input.dtype)
92
 
93
- output = ops.gptq_marlin_gemm(a=reshaped_x,
94
  c=None,
95
  b_q_weight=weight,
96
  b_scales=weight_scale,
@@ -135,7 +134,7 @@ def prepare_fp4_layer_for_marlin(layer: torch.nn.Module) -> None:
135
  perm = torch.empty(0, dtype=torch.int, device=device)
136
  qweight = layer.weight.view(torch.int32).T.contiguous()
137
 
138
- marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight,
139
  perm=perm,
140
  size_k=part_size_k,
141
  size_n=part_size_n,
@@ -192,7 +191,7 @@ def prepare_moe_fp4_layer_for_marlin(layer: torch.nn.Module) -> None:
192
  for i in range(e):
193
  qweight = weight[i].view(torch.int32).T.contiguous()
194
 
195
- marlin_qweight = ops.gptq_marlin_repack(b_q_weight=qweight,
196
  perm=perm,
197
  size_k=size_k,
198
  size_n=size_n,
@@ -263,7 +262,7 @@ def rand_marlin_weight_fp4_like(weight, group_size):
263
  weight_ref = weight_ref * global_scale.to(weight.dtype) * \
264
  scales.repeat_interleave(group_size, 1).to(weight.dtype)
265
 
266
- marlin_qweight = ops.gptq_marlin_repack(
267
  b_q_weight=fp4_weight.view(torch.int32).T.contiguous(),
268
  perm=torch.empty(0, dtype=torch.int, device=device),
269
  size_k=size_k,
 
5
 
6
  import torch
7
 
8
+ from .. import gptq_marlin_gemm, gptq_marlin_repack
 
9
  from .marlin_utils import (
10
  USE_FP32_REDUCE_DEFAULT, marlin_make_workspace_new, marlin_permute_scales,
11
  should_use_atomic_add_reduce)
12
+ from ..scalar_type import scalar_types
13
 
14
  FP4_MARLIN_SUPPORTED_GROUP_SIZES = [16]
15
 
 
89
  device=input.device,
90
  dtype=input.dtype)
91
 
92
+ output = gptq_marlin_gemm(a=reshaped_x,
93
  c=None,
94
  b_q_weight=weight,
95
  b_scales=weight_scale,
 
134
  perm = torch.empty(0, dtype=torch.int, device=device)
135
  qweight = layer.weight.view(torch.int32).T.contiguous()
136
 
137
+ marlin_qweight = gptq_marlin_repack(b_q_weight=qweight,
138
  perm=perm,
139
  size_k=part_size_k,
140
  size_n=part_size_n,
 
191
  for i in range(e):
192
  qweight = weight[i].view(torch.int32).T.contiguous()
193
 
194
+ marlin_qweight = gptq_marlin_repack(b_q_weight=qweight,
195
  perm=perm,
196
  size_k=size_k,
197
  size_n=size_n,
 
262
  weight_ref = weight_ref * global_scale.to(weight.dtype) * \
263
  scales.repeat_interleave(group_size, 1).to(weight.dtype)
264
 
265
+ marlin_qweight = gptq_marlin_repack(
266
  b_q_weight=fp4_weight.view(torch.int32).T.contiguous(),
267
  perm=torch.empty(0, dtype=torch.int, device=device),
268
  size_k=size_k,
build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils_fp8.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
 
6
  import torch
7
 
8
- import quantization as ops
9
 
10
  from .marlin_utils import USE_FP32_REDUCE_DEFAULT, marlin_make_workspace, marlin_permute_scales
11
 
@@ -51,7 +51,7 @@ def apply_fp8_marlin_linear(
51
  device=input.device,
52
  dtype=input.dtype)
53
 
54
- output = ops.gptq_marlin_gemm(a=reshaped_x,
55
  c=None,
56
  b_q_weight=weight,
57
  b_scales=weight_scale,
@@ -104,7 +104,7 @@ def marlin_quant_fp8_torch(weight, group_size):
104
  weight_ref = fp8_weight.to(weight.dtype) * repeated_scales
105
 
106
  packed_weight = pack_fp8_to_int32(fp8_weight, False).T.contiguous()
107
- marlin_qweight = ops.gptq_marlin_repack(
108
  b_q_weight=packed_weight,
109
  perm=torch.empty(0, dtype=torch.int, device=device),
110
  size_k=size_k,
 
5
 
6
  import torch
7
 
8
+ from .. import gptq_marlin_gemm, gptq_marlin_repack
9
 
10
  from .marlin_utils import USE_FP32_REDUCE_DEFAULT, marlin_make_workspace, marlin_permute_scales
11
 
 
51
  device=input.device,
52
  dtype=input.dtype)
53
 
54
+ output = gptq_marlin_gemm(a=reshaped_x,
55
  c=None,
56
  b_q_weight=weight,
57
  b_scales=weight_scale,
 
104
  weight_ref = fp8_weight.to(weight.dtype) * repeated_scales
105
 
106
  packed_weight = pack_fp8_to_int32(fp8_weight, False).T.contiguous()
107
+ marlin_qweight = gptq_marlin_repack(
108
  b_q_weight=packed_weight,
109
  perm=torch.empty(0, dtype=torch.int, device=device),
110
  size_k=size_k,
build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils_test.py CHANGED
@@ -5,8 +5,7 @@ from typing import List, Optional
5
  import numpy as np
6
  import torch
7
 
8
- from quantization.scalar_type import ScalarType
9
-
10
  from .marlin_utils import GPTQ_MARLIN_TILE, marlin_permute_scales, marlin_zero_points
11
  from .quant_utils import (
12
  get_pack_factor,
 
5
  import numpy as np
6
  import torch
7
 
8
+ from ..scalar_type import ScalarType
 
9
  from .marlin_utils import GPTQ_MARLIN_TILE, marlin_permute_scales, marlin_zero_points
10
  from .quant_utils import (
11
  get_pack_factor,
build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/marlin_utils_test_24.py CHANGED
@@ -6,8 +6,7 @@ from typing import List
6
  import numpy
7
  import torch
8
 
9
- from quantization.scalar_type import ScalarType
10
-
11
  from .marlin_utils_test import marlin_weights
12
  from .quant_utils import gptq_quantize_weights
13
 
 
6
  import numpy
7
  import torch
8
 
9
+ from ..scalar_type import ScalarType
 
10
  from .marlin_utils_test import marlin_weights
11
  from .quant_utils import gptq_quantize_weights
12
 
build/torch26-cxx11-cu124-x86_64-linux/quantization/utils/quant_utils.py CHANGED
@@ -5,7 +5,7 @@ from typing import List, Optional
5
  import numpy
6
  import torch
7
 
8
- from quantization.scalar_type import ScalarType, scalar_types
9
 
10
  SUPPORTED_GPTQ_QUANT_TYPES = [scalar_types.uint4b8, scalar_types.uint8b128]
11
  SUPPORTED_GROUP_SIZES = [-1, 32, 64, 128]
 
5
  import numpy
6
  import torch
7
 
8
+ from ..scalar_type import ScalarType, scalar_types
9
 
10
  SUPPORTED_GPTQ_QUANT_TYPES = [scalar_types.uint4b8, scalar_types.uint8b128]
11
  SUPPORTED_GROUP_SIZES = [-1, 32, 64, 128]
build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (1.02 kB). View file
 
build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/_ops.cpython-312.pyc ADDED
Binary file (538 Bytes). View file
 
build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/compressed_tensors.cpython-312.pyc ADDED
Binary file (5.33 kB). View file
 
build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/cutlass.cpython-312.pyc ADDED
Binary file (3.88 kB). View file
 
build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/marlin.cpython-312.pyc ADDED
Binary file (7.9 kB). View file
 
build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/platforms.cpython-312.pyc ADDED
Binary file (5.75 kB). View file
 
build/torch26-cxx11-cu126-x86_64-linux/quantization/__pycache__/scalar_type.cpython-312.pyc ADDED
Binary file (14.2 kB). View file
 
build/torch26-cxx11-cu126-x86_64-linux/quantization/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _quantization_dfa7d18
3
- ops = torch.ops._quantization_dfa7d18
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_quantization_dfa7d18::{op_name}"
 
1
  import torch
2
+ from . import _quantization_3313895
3
+ ops = torch.ops._quantization_3313895
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_quantization_3313895::{op_name}"
build/torch26-cxx11-cu126-x86_64-linux/quantization/{_quantization_dfa7d18.abi3.so → _quantization_3313895.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af7fad3054f0981d175aa7dcabf9dbe3c556ba0dcee7f20a2c104abd17dce7a5
3
- size 160280624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c33c0b9d3ba9e713cdb661ee669e95379212181902123b84161cf84dee599bca
3
+ size 160276536
build/torch26-cxx11-cu126-x86_64-linux/quantization/utils/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (185 Bytes). View file