danieldk HF Staff commited on
Commit
6677800
·
1 Parent(s): 4c6b316

Enable ROCm build

Browse files
Files changed (2) hide show
  1. build.toml +52 -1
  2. flake.lock +6 -6
build.toml CHANGED
@@ -15,7 +15,25 @@ src = [
15
  "cuda-utils/cuda_utils_kernels.cu",
16
  ]
17
  depends = []
18
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  [kernel.paged_attention]
21
  backend = "cuda"
@@ -40,6 +58,39 @@ src = [
40
  include = [ "cuda-utils", "paged-attention" ]
41
  depends = [ "torch" ]
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  [kernel.paged_attention_metal]
45
  backend = "metal"
 
15
  "cuda-utils/cuda_utils_kernels.cu",
16
  ]
17
  depends = []
18
+
19
+ [kernel.cuda_utils_rocm]
20
+ backend = "rocm"
21
+ rocm-archs = [
22
+ "gfx906",
23
+ "gfx908",
24
+ "gfx90a",
25
+ "gfx940",
26
+ "gfx941",
27
+ "gfx942",
28
+ "gfx1030",
29
+ "gfx1100",
30
+ "gfx1101",
31
+ ]
32
+ src = [
33
+ "cuda-utils/cuda_utils.h",
34
+ "cuda-utils/cuda_utils_kernels.cu",
35
+ ]
36
+ depends = ["torch"]
37
 
38
  [kernel.paged_attention]
39
  backend = "cuda"
 
58
  include = [ "cuda-utils", "paged-attention" ]
59
  depends = [ "torch" ]
60
 
61
+ [kernel.paged_attention_rocm]
62
+ backend = "rocm"
63
+ rocm-archs = [
64
+ "gfx906",
65
+ "gfx908",
66
+ "gfx90a",
67
+ "gfx940",
68
+ "gfx941",
69
+ "gfx942",
70
+ "gfx1030",
71
+ "gfx1100",
72
+ "gfx1101",
73
+ ]
74
+ src = [
75
+ "cuda-utils/cuda_utils.h",
76
+ "paged-attention/attention/attention_dtypes.h",
77
+ "paged-attention/attention/attention_generic.cuh",
78
+ "paged-attention/attention/attention_kernels.cuh",
79
+ "paged-attention/attention/attention_utils.cuh",
80
+ "paged-attention/attention/dtype_bfloat16.cuh",
81
+ "paged-attention/attention/dtype_float16.cuh",
82
+ "paged-attention/attention/dtype_float32.cuh",
83
+ "paged-attention/attention/dtype_fp8.cuh",
84
+ "paged-attention/attention/paged_attention_v1.cu",
85
+ "paged-attention/attention/paged_attention_v2.cu",
86
+ "paged-attention/cache_kernels.cu",
87
+ "paged-attention/cuda_compat.h",
88
+ "paged-attention/dispatch_utils.h",
89
+ "paged-attention/quantization/fp8/amd/quant_utils.cuh",
90
+ "paged-attention/quantization/fp8/nvidia/quant_utils.cuh",
91
+ ]
92
+ include = [ "cuda-utils", "paged-attention" ]
93
+ depends = [ "torch" ]
94
 
95
  [kernel.paged_attention_metal]
96
  backend = "metal"
flake.lock CHANGED
@@ -73,11 +73,11 @@
73
  "nixpkgs": "nixpkgs"
74
  },
75
  "locked": {
76
- "lastModified": 1750234878,
77
- "narHash": "sha256-q9DRC9zdpzUf88qqg1qbhP1qgJbE2cMtn8oUmosuyT8=",
78
  "owner": "huggingface",
79
  "repo": "hf-nix",
80
- "rev": "c7132f90763d756da3e77da62e01be0a4546dc57",
81
  "type": "github"
82
  },
83
  "original": {
@@ -98,11 +98,11 @@
98
  ]
99
  },
100
  "locked": {
101
- "lastModified": 1750917308,
102
- "narHash": "sha256-/kRwI2GgYwhgFwFGZ/tOgQr1qdihidU89ngDviqxTtU=",
103
  "owner": "huggingface",
104
  "repo": "kernel-builder",
105
- "rev": "5fb8be4d148b5e4d0e2130998d02bafca71520c7",
106
  "type": "github"
107
  },
108
  "original": {
 
73
  "nixpkgs": "nixpkgs"
74
  },
75
  "locked": {
76
+ "lastModified": 1751968576,
77
+ "narHash": "sha256-cmKrlWpNTG/hq1bCaHXfbdm9T+Y6V+5//EHAVc1TLBE=",
78
  "owner": "huggingface",
79
  "repo": "hf-nix",
80
+ "rev": "3fcd1e1b46da91b6691261640ffd6b7123d0cb9e",
81
  "type": "github"
82
  },
83
  "original": {
 
98
  ]
99
  },
100
  "locked": {
101
+ "lastModified": 1753256281,
102
+ "narHash": "sha256-CfL3Fyf2ih7OtyL7ScZUCwOeCj+gjlRyPykhR6Zbt3I=",
103
  "owner": "huggingface",
104
  "repo": "kernel-builder",
105
+ "rev": "dcbbdf2d3c8e78b27321b205b2c9d67ffce6a706",
106
  "type": "github"
107
  },
108
  "original": {