kernels-community
/

paged-attention

kernel

Model card Files Files and versions Community

danieldk HF Staff commited on 5 days ago

Commit

6677800

1 Parent(s): 4c6b316

Enable ROCm build

Browse files

Files changed (2) hide show

build.toml +52 -1
flake.lock +6 -6

build.toml CHANGED Viewed

@@ -15,7 +15,25 @@ src = [
   "cuda-utils/cuda_utils_kernels.cu",
 ]
 depends = []
 [kernel.paged_attention]
 backend = "cuda"
@@ -40,6 +58,39 @@ src = [
 include = [ "cuda-utils", "paged-attention" ]
 depends = [ "torch" ]
 [kernel.paged_attention_metal]
 backend = "metal"

   "cuda-utils/cuda_utils_kernels.cu",
 ]
 depends = []
+[kernel.cuda_utils_rocm]
+backend = "rocm"
+rocm-archs = [
+    "gfx906",
+    "gfx908",
+    "gfx90a",
+    "gfx940",
+    "gfx941",
+    "gfx942",
+    "gfx1030",
+    "gfx1100",
+    "gfx1101",
+]
+src = [
+  "cuda-utils/cuda_utils.h",
+  "cuda-utils/cuda_utils_kernels.cu",
+]
+depends = ["torch"]
 [kernel.paged_attention]
 backend = "cuda"
 include = [ "cuda-utils", "paged-attention" ]
 depends = [ "torch" ]
+[kernel.paged_attention_rocm]
+backend = "rocm"
+rocm-archs = [
+    "gfx906",
+    "gfx908",
+    "gfx90a",
+    "gfx940",
+    "gfx941",
+    "gfx942",
+    "gfx1030",
+    "gfx1100",
+    "gfx1101",
+]
+src = [
+  "cuda-utils/cuda_utils.h",
+  "paged-attention/attention/attention_dtypes.h",
+  "paged-attention/attention/attention_generic.cuh",
+  "paged-attention/attention/attention_kernels.cuh",
+  "paged-attention/attention/attention_utils.cuh",
+  "paged-attention/attention/dtype_bfloat16.cuh",
+  "paged-attention/attention/dtype_float16.cuh",
+  "paged-attention/attention/dtype_float32.cuh",
+  "paged-attention/attention/dtype_fp8.cuh",
+  "paged-attention/attention/paged_attention_v1.cu",
+  "paged-attention/attention/paged_attention_v2.cu",
+  "paged-attention/cache_kernels.cu",
+  "paged-attention/cuda_compat.h",
+  "paged-attention/dispatch_utils.h",
+  "paged-attention/quantization/fp8/amd/quant_utils.cuh",
+  "paged-attention/quantization/fp8/nvidia/quant_utils.cuh",
+]
+include = [ "cuda-utils", "paged-attention" ]
+depends = [ "torch" ]
 [kernel.paged_attention_metal]
 backend = "metal"

flake.lock CHANGED Viewed

@@ -73,11 +73,11 @@
         "nixpkgs": "nixpkgs"
       },
       "locked": {
-        "lastModified": 1750234878,
-        "narHash": "sha256-q9DRC9zdpzUf88qqg1qbhP1qgJbE2cMtn8oUmosuyT8=",
         "owner": "huggingface",
         "repo": "hf-nix",
-        "rev": "c7132f90763d756da3e77da62e01be0a4546dc57",
         "type": "github"
       },
       "original": {
@@ -98,11 +98,11 @@
         ]
       },
       "locked": {
-        "lastModified": 1750917308,
-        "narHash": "sha256-/kRwI2GgYwhgFwFGZ/tOgQr1qdihidU89ngDviqxTtU=",
         "owner": "huggingface",
         "repo": "kernel-builder",
-        "rev": "5fb8be4d148b5e4d0e2130998d02bafca71520c7",
         "type": "github"
       },
       "original": {

         "nixpkgs": "nixpkgs"
       },
       "locked": {
+        "lastModified": 1751968576,
+        "narHash": "sha256-cmKrlWpNTG/hq1bCaHXfbdm9T+Y6V+5//EHAVc1TLBE=",
         "owner": "huggingface",
         "repo": "hf-nix",
+        "rev": "3fcd1e1b46da91b6691261640ffd6b7123d0cb9e",
         "type": "github"
       },
       "original": {
         ]
       },
       "locked": {
+        "lastModified": 1753256281,
+        "narHash": "sha256-CfL3Fyf2ih7OtyL7ScZUCwOeCj+gjlRyPykhR6Zbt3I=",
         "owner": "huggingface",
         "repo": "kernel-builder",
+        "rev": "dcbbdf2d3c8e78b27321b205b2c9d67ffce6a706",
         "type": "github"
       },
       "original": {