Temporarily disable ROCm
Browse files- build.toml +48 -48
build.toml
CHANGED
@@ -46,35 +46,35 @@ src = [
|
|
46 |
"gptq_marlin/marlin_template.h",
|
47 |
]
|
48 |
|
49 |
-
[kernel.fp8_common_rocm]
|
50 |
-
backend = "rocm"
|
51 |
-
depends = ["torch"]
|
52 |
-
rocm-archs = [
|
53 |
-
"gfx906",
|
54 |
-
"gfx908",
|
55 |
-
"gfx90a",
|
56 |
-
"gfx940",
|
57 |
-
"gfx941",
|
58 |
-
"gfx942",
|
59 |
-
"gfx1030",
|
60 |
-
"gfx1100",
|
61 |
-
"gfx1101",
|
62 |
-
]
|
63 |
-
include = ["."]
|
64 |
-
src = [
|
65 |
-
"attention/attention_dtypes.h",
|
66 |
-
"attention/attention_generic.cuh",
|
67 |
-
"attention/dtype_bfloat16.cuh",
|
68 |
-
"attention/dtype_float16.cuh",
|
69 |
-
"attention/dtype_float32.cuh",
|
70 |
-
"attention/dtype_fp8.cuh",
|
71 |
-
"fp8/amd/quant_utils.cuh",
|
72 |
-
"fp8/common.cu",
|
73 |
-
"fp8/common.cuh",
|
74 |
-
"dispatch_utils.h",
|
75 |
-
"utils.cuh",
|
76 |
-
"vectorization.cuh",
|
77 |
-
]
|
78 |
|
79 |
[kernel.int8_common]
|
80 |
backend = "cuda"
|
@@ -240,22 +240,22 @@ src = [
|
|
240 |
"marlin/sparse/marlin_24_cuda_kernel.cu",
|
241 |
]
|
242 |
|
243 |
-
[kernel.int8_common_rocm]
|
244 |
-
backend = "rocm"
|
245 |
-
depends = ["torch"]
|
246 |
-
rocm-archs = [
|
247 |
-
"gfx906",
|
248 |
-
"gfx908",
|
249 |
-
"gfx90a",
|
250 |
-
"gfx940",
|
251 |
-
"gfx941",
|
252 |
-
"gfx942",
|
253 |
-
"gfx1030",
|
254 |
-
"gfx1100",
|
255 |
-
"gfx1101",
|
256 |
-
]
|
257 |
-
include = ["."]
|
258 |
-
src = [
|
259 |
-
"compressed_tensors/int8_quant_kernels.cu",
|
260 |
-
"dispatch_utils.h",
|
261 |
-
]
|
|
|
46 |
"gptq_marlin/marlin_template.h",
|
47 |
]
|
48 |
|
49 |
+
#[kernel.fp8_common_rocm]
|
50 |
+
#backend = "rocm"
|
51 |
+
#depends = ["torch"]
|
52 |
+
#rocm-archs = [
|
53 |
+
# "gfx906",
|
54 |
+
# "gfx908",
|
55 |
+
# "gfx90a",
|
56 |
+
# "gfx940",
|
57 |
+
# "gfx941",
|
58 |
+
# "gfx942",
|
59 |
+
# "gfx1030",
|
60 |
+
# "gfx1100",
|
61 |
+
# "gfx1101",
|
62 |
+
#]
|
63 |
+
#include = ["."]
|
64 |
+
#src = [
|
65 |
+
# "attention/attention_dtypes.h",
|
66 |
+
# "attention/attention_generic.cuh",
|
67 |
+
# "attention/dtype_bfloat16.cuh",
|
68 |
+
# "attention/dtype_float16.cuh",
|
69 |
+
# "attention/dtype_float32.cuh",
|
70 |
+
# "attention/dtype_fp8.cuh",
|
71 |
+
# "fp8/amd/quant_utils.cuh",
|
72 |
+
# "fp8/common.cu",
|
73 |
+
# "fp8/common.cuh",
|
74 |
+
# "dispatch_utils.h",
|
75 |
+
# "utils.cuh",
|
76 |
+
# "vectorization.cuh",
|
77 |
+
#]
|
78 |
|
79 |
[kernel.int8_common]
|
80 |
backend = "cuda"
|
|
|
240 |
"marlin/sparse/marlin_24_cuda_kernel.cu",
|
241 |
]
|
242 |
|
243 |
+
#[kernel.int8_common_rocm]
|
244 |
+
#backend = "rocm"
|
245 |
+
#depends = ["torch"]
|
246 |
+
#rocm-archs = [
|
247 |
+
# "gfx906",
|
248 |
+
# "gfx908",
|
249 |
+
# "gfx90a",
|
250 |
+
# "gfx940",
|
251 |
+
# "gfx941",
|
252 |
+
# "gfx942",
|
253 |
+
# "gfx1030",
|
254 |
+
# "gfx1100",
|
255 |
+
# "gfx1101",
|
256 |
+
#]
|
257 |
+
#include = ["."]
|
258 |
+
#src = [
|
259 |
+
# "compressed_tensors/int8_quant_kernels.cu",
|
260 |
+
# "dispatch_utils.h",
|
261 |
+
#]
|