diff --git a/build/torch25-cxx11-cu118-x86_64-linux/paged_attention/_ops.py b/build/torch25-cxx11-cu118-x86_64-linux/paged_attention/_ops.py
deleted file mode 100644
index a0c7ecd509f9dc04c72c92c1c5389a8e04100860..0000000000000000000000000000000000000000
--- a/build/torch25-cxx11-cu118-x86_64-linux/paged_attention/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _paged_attention_daf6221
-ops = torch.ops._paged_attention_daf6221
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_paged_attention_daf6221::{op_name}"
\ No newline at end of file
diff --git a/build/torch25-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so b/build/torch25-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
deleted file mode 100755
index 2a5dbb93120de616632fc07ca35703ced6657194..0000000000000000000000000000000000000000
--- a/build/torch25-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:353c8fd8c18d99439153c6db3dc20b5f1834d6a6ba64e9d8f9a03cc5c196d2c2
-size 91244024
diff --git a/build/torch25-cxx11-cu118-x86_64-linux/paged_attention/platforms.py b/build/torch25-cxx11-cu118-x86_64-linux/paged_attention/platforms.py
deleted file mode 100644
index aa06132e74cd7fb634044a76e528979b02a3559b..0000000000000000000000000000000000000000
--- a/build/torch25-cxx11-cu118-x86_64-linux/paged_attention/platforms.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import os
-import random
-from abc import ABC, abstractmethod
-from functools import lru_cache, wraps
-from typing import Callable, ParamSpec, TypeVar
-
-import numpy as np
-import torch
-
-IS_ROCM = torch.version.hip is not None
-
-
-class Platform(ABC):
-    @classmethod
-    def seed_everything(cls, seed: int) -> None:
-        """
-        Set the seed of each random module.
-        `torch.manual_seed` will set seed on all devices.
-
-        Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
-        """
-        random.seed(seed)
-        np.random.seed(seed)
-        torch.manual_seed(seed)
-
-    @abstractmethod
-    def get_device_name(self, device_id: int = 0) -> str: ...
-
-    @abstractmethod
-    def is_cuda(self) -> bool: ...
-
-    @abstractmethod
-    def is_rocm(self) -> bool: ...
-
-
-class CudaPlatform(Platform):
-    @classmethod
-    @lru_cache(maxsize=8)
-    def get_device_name(cls, device_id: int = 0) -> str:
-        return torch.cuda.get_device_name(0)
-
-    def is_cuda(self) -> bool:
-        return True
-
-    def is_rocm(self) -> bool:
-        return False
-
-
-class RocmPlatform(Platform):
-    @classmethod
-    @lru_cache(maxsize=8)
-    def get_device_name(cls, device_id: int = 0) -> str:
-        return torch.cuda.get_device_name(device_id)
-
-    def is_cuda(self) -> bool:
-        return False
-
-    def is_rocm(self) -> bool:
-        return True
-
-
-current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
diff --git a/build/torch25-cxx11-cu121-x86_64-linux/paged_attention/_ops.py b/build/torch25-cxx11-cu121-x86_64-linux/paged_attention/_ops.py
deleted file mode 100644
index a0c7ecd509f9dc04c72c92c1c5389a8e04100860..0000000000000000000000000000000000000000
--- a/build/torch25-cxx11-cu121-x86_64-linux/paged_attention/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _paged_attention_daf6221
-ops = torch.ops._paged_attention_daf6221
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_paged_attention_daf6221::{op_name}"
\ No newline at end of file
diff --git a/build/torch25-cxx11-cu121-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so b/build/torch25-cxx11-cu121-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
deleted file mode 100755
index 0346ee79cb331560792eddd5b39e67c1107787da..0000000000000000000000000000000000000000
--- a/build/torch25-cxx11-cu121-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2156391e81ab2d76386131056f595c50c00f0d68c2427ab0872a457385f3c804
-size 87948952
diff --git a/build/torch25-cxx11-cu124-x86_64-linux/paged_attention/__init__.py b/build/torch25-cxx11-cu124-x86_64-linux/paged_attention/__init__.py
deleted file mode 100644
index 9de56043369487facc1f163df6bd319c9806e5ca..0000000000000000000000000000000000000000
--- a/build/torch25-cxx11-cu124-x86_64-linux/paged_attention/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from ._custom_ops import (
-    convert_fp8,
-    copy_blocks,
-    paged_attention_v1,
-    paged_attention_v2,
-    reshape_and_cache,
-    reshape_and_cache_flash,
-    swap_blocks,
-)
-from ._ops import ops
-
-__all__ = [
-    "convert_fp8",
-    "copy_blocks",
-    "ops",
-    "paged_attention_v1",
-    "paged_attention_v2",
-    "reshape_and_cache",
-    "reshape_and_cache_flash",
-    "swap_blocks",
-]
diff --git a/build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_custom_ops.py b/build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_custom_ops.py
deleted file mode 100644
index a0c0b8db085468dee5100c98d14106a9ee917bf2..0000000000000000000000000000000000000000
--- a/build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_custom_ops.py
+++ /dev/null
@@ -1,173 +0,0 @@
-from typing import List, Optional
-
-import torch
-
-from ._ops import ops
-
-
-# page attention ops
-def paged_attention_v1(
-    out: torch.Tensor,
-    query: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    num_kv_heads: int,
-    scale: float,
-    block_tables: torch.Tensor,
-    seq_lens: torch.Tensor,
-    block_size: int,
-    max_seq_len: int,
-    alibi_slopes: Optional[torch.Tensor],
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-    tp_rank: int = 0,
-    blocksparse_local_blocks: int = 0,
-    blocksparse_vert_stride: int = 0,
-    blocksparse_block_size: int = 64,
-    blocksparse_head_sliding_step: int = 0,
-) -> None:
-    ops.paged_attention_v1(
-        out,
-        query,
-        key_cache,
-        value_cache,
-        num_kv_heads,
-        scale,
-        block_tables,
-        seq_lens,
-        block_size,
-        max_seq_len,
-        alibi_slopes,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-        tp_rank,
-        blocksparse_local_blocks,
-        blocksparse_vert_stride,
-        blocksparse_block_size,
-        blocksparse_head_sliding_step,
-    )
-
-
-def paged_attention_v2(
-    out: torch.Tensor,
-    exp_sum: torch.Tensor,
-    max_logits: torch.Tensor,
-    tmp_out: torch.Tensor,
-    query: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    num_kv_heads: int,
-    scale: float,
-    block_tables: torch.Tensor,
-    seq_lens: torch.Tensor,
-    block_size: int,
-    max_seq_len: int,
-    alibi_slopes: Optional[torch.Tensor],
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-    tp_rank: int = 0,
-    blocksparse_local_blocks: int = 0,
-    blocksparse_vert_stride: int = 0,
-    blocksparse_block_size: int = 64,
-    blocksparse_head_sliding_step: int = 0,
-) -> None:
-    ops.paged_attention_v2(
-        out,
-        exp_sum,
-        max_logits,
-        tmp_out,
-        query,
-        key_cache,
-        value_cache,
-        num_kv_heads,
-        scale,
-        block_tables,
-        seq_lens,
-        block_size,
-        max_seq_len,
-        alibi_slopes,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-        tp_rank,
-        blocksparse_local_blocks,
-        blocksparse_vert_stride,
-        blocksparse_block_size,
-        blocksparse_head_sliding_step,
-    )
-
-
-def reshape_and_cache(
-    key: torch.Tensor,
-    value: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    slot_mapping: torch.Tensor,
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-) -> None:
-    ops.reshape_and_cache(
-        key,
-        value,
-        key_cache,
-        value_cache,
-        slot_mapping,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-    )
-
-
-def reshape_and_cache_flash(
-    key: torch.Tensor,
-    value: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    slot_mapping: torch.Tensor,
-    kv_cache_dtype: str,
-    k_scale: torch.Tensor,
-    v_scale: torch.Tensor,
-) -> None:
-    ops.reshape_and_cache_flash(
-        key,
-        value,
-        key_cache,
-        value_cache,
-        slot_mapping,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-    )
-
-
-def copy_blocks(
-    key_caches: List[torch.Tensor],
-    value_caches: List[torch.Tensor],
-    block_mapping: torch.Tensor,
-) -> None:
-    ops.copy_blocks(key_caches, value_caches, block_mapping)
-
-
-def swap_blocks(
-    src: torch.Tensor, dst: torch.Tensor, block_mapping: torch.Tensor
-) -> None:
-    ops.swap_blocks(src, dst, block_mapping)
-
-
-def convert_fp8(
-    output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8"
-) -> None:
-    ops.convert_fp8(output, input, scale, kv_dtype)
-
-
-__all__ = [
-    "convert_fp8",
-    "paged_attention_v1",
-    "paged_attention_v2",
-    "reshape_and_cache",
-    "copy_blocks",
-]
diff --git a/build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_ops.py b/build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_ops.py
deleted file mode 100644
index a0c7ecd509f9dc04c72c92c1c5389a8e04100860..0000000000000000000000000000000000000000
--- a/build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _paged_attention_daf6221
-ops = torch.ops._paged_attention_daf6221
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_paged_attention_daf6221::{op_name}"
\ No newline at end of file
diff --git a/build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so b/build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
deleted file mode 100755
index d58ac66d476984df8e4c7b15a801ad0b904895ce..0000000000000000000000000000000000000000
--- a/build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d7ddfc4079b69c3e62274797778d5083c0ebbcc9c8d91a13da296d07ea2b4f2e
-size 88057032
diff --git a/build/torch25-cxx11-cu124-x86_64-linux/paged_attention/platforms.py b/build/torch25-cxx11-cu124-x86_64-linux/paged_attention/platforms.py
deleted file mode 100644
index aa06132e74cd7fb634044a76e528979b02a3559b..0000000000000000000000000000000000000000
--- a/build/torch25-cxx11-cu124-x86_64-linux/paged_attention/platforms.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import os
-import random
-from abc import ABC, abstractmethod
-from functools import lru_cache, wraps
-from typing import Callable, ParamSpec, TypeVar
-
-import numpy as np
-import torch
-
-IS_ROCM = torch.version.hip is not None
-
-
-class Platform(ABC):
-    @classmethod
-    def seed_everything(cls, seed: int) -> None:
-        """
-        Set the seed of each random module.
-        `torch.manual_seed` will set seed on all devices.
-
-        Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
-        """
-        random.seed(seed)
-        np.random.seed(seed)
-        torch.manual_seed(seed)
-
-    @abstractmethod
-    def get_device_name(self, device_id: int = 0) -> str: ...
-
-    @abstractmethod
-    def is_cuda(self) -> bool: ...
-
-    @abstractmethod
-    def is_rocm(self) -> bool: ...
-
-
-class CudaPlatform(Platform):
-    @classmethod
-    @lru_cache(maxsize=8)
-    def get_device_name(cls, device_id: int = 0) -> str:
-        return torch.cuda.get_device_name(0)
-
-    def is_cuda(self) -> bool:
-        return True
-
-    def is_rocm(self) -> bool:
-        return False
-
-
-class RocmPlatform(Platform):
-    @classmethod
-    @lru_cache(maxsize=8)
-    def get_device_name(cls, device_id: int = 0) -> str:
-        return torch.cuda.get_device_name(device_id)
-
-    def is_cuda(self) -> bool:
-        return False
-
-    def is_rocm(self) -> bool:
-        return True
-
-
-current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
diff --git a/build/torch25-cxx98-cu118-x86_64-linux/paged_attention/__init__.py b/build/torch25-cxx98-cu118-x86_64-linux/paged_attention/__init__.py
deleted file mode 100644
index 9de56043369487facc1f163df6bd319c9806e5ca..0000000000000000000000000000000000000000
--- a/build/torch25-cxx98-cu118-x86_64-linux/paged_attention/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from ._custom_ops import (
-    convert_fp8,
-    copy_blocks,
-    paged_attention_v1,
-    paged_attention_v2,
-    reshape_and_cache,
-    reshape_and_cache_flash,
-    swap_blocks,
-)
-from ._ops import ops
-
-__all__ = [
-    "convert_fp8",
-    "copy_blocks",
-    "ops",
-    "paged_attention_v1",
-    "paged_attention_v2",
-    "reshape_and_cache",
-    "reshape_and_cache_flash",
-    "swap_blocks",
-]
diff --git a/build/torch25-cxx98-cu118-x86_64-linux/paged_attention/_custom_ops.py b/build/torch25-cxx98-cu118-x86_64-linux/paged_attention/_custom_ops.py
deleted file mode 100644
index a0c0b8db085468dee5100c98d14106a9ee917bf2..0000000000000000000000000000000000000000
--- a/build/torch25-cxx98-cu118-x86_64-linux/paged_attention/_custom_ops.py
+++ /dev/null
@@ -1,173 +0,0 @@
-from typing import List, Optional
-
-import torch
-
-from ._ops import ops
-
-
-# page attention ops
-def paged_attention_v1(
-    out: torch.Tensor,
-    query: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    num_kv_heads: int,
-    scale: float,
-    block_tables: torch.Tensor,
-    seq_lens: torch.Tensor,
-    block_size: int,
-    max_seq_len: int,
-    alibi_slopes: Optional[torch.Tensor],
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-    tp_rank: int = 0,
-    blocksparse_local_blocks: int = 0,
-    blocksparse_vert_stride: int = 0,
-    blocksparse_block_size: int = 64,
-    blocksparse_head_sliding_step: int = 0,
-) -> None:
-    ops.paged_attention_v1(
-        out,
-        query,
-        key_cache,
-        value_cache,
-        num_kv_heads,
-        scale,
-        block_tables,
-        seq_lens,
-        block_size,
-        max_seq_len,
-        alibi_slopes,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-        tp_rank,
-        blocksparse_local_blocks,
-        blocksparse_vert_stride,
-        blocksparse_block_size,
-        blocksparse_head_sliding_step,
-    )
-
-
-def paged_attention_v2(
-    out: torch.Tensor,
-    exp_sum: torch.Tensor,
-    max_logits: torch.Tensor,
-    tmp_out: torch.Tensor,
-    query: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    num_kv_heads: int,
-    scale: float,
-    block_tables: torch.Tensor,
-    seq_lens: torch.Tensor,
-    block_size: int,
-    max_seq_len: int,
-    alibi_slopes: Optional[torch.Tensor],
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-    tp_rank: int = 0,
-    blocksparse_local_blocks: int = 0,
-    blocksparse_vert_stride: int = 0,
-    blocksparse_block_size: int = 64,
-    blocksparse_head_sliding_step: int = 0,
-) -> None:
-    ops.paged_attention_v2(
-        out,
-        exp_sum,
-        max_logits,
-        tmp_out,
-        query,
-        key_cache,
-        value_cache,
-        num_kv_heads,
-        scale,
-        block_tables,
-        seq_lens,
-        block_size,
-        max_seq_len,
-        alibi_slopes,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-        tp_rank,
-        blocksparse_local_blocks,
-        blocksparse_vert_stride,
-        blocksparse_block_size,
-        blocksparse_head_sliding_step,
-    )
-
-
-def reshape_and_cache(
-    key: torch.Tensor,
-    value: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    slot_mapping: torch.Tensor,
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-) -> None:
-    ops.reshape_and_cache(
-        key,
-        value,
-        key_cache,
-        value_cache,
-        slot_mapping,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-    )
-
-
-def reshape_and_cache_flash(
-    key: torch.Tensor,
-    value: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    slot_mapping: torch.Tensor,
-    kv_cache_dtype: str,
-    k_scale: torch.Tensor,
-    v_scale: torch.Tensor,
-) -> None:
-    ops.reshape_and_cache_flash(
-        key,
-        value,
-        key_cache,
-        value_cache,
-        slot_mapping,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-    )
-
-
-def copy_blocks(
-    key_caches: List[torch.Tensor],
-    value_caches: List[torch.Tensor],
-    block_mapping: torch.Tensor,
-) -> None:
-    ops.copy_blocks(key_caches, value_caches, block_mapping)
-
-
-def swap_blocks(
-    src: torch.Tensor, dst: torch.Tensor, block_mapping: torch.Tensor
-) -> None:
-    ops.swap_blocks(src, dst, block_mapping)
-
-
-def convert_fp8(
-    output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8"
-) -> None:
-    ops.convert_fp8(output, input, scale, kv_dtype)
-
-
-__all__ = [
-    "convert_fp8",
-    "paged_attention_v1",
-    "paged_attention_v2",
-    "reshape_and_cache",
-    "copy_blocks",
-]
diff --git a/build/torch25-cxx98-cu118-x86_64-linux/paged_attention/_ops.py b/build/torch25-cxx98-cu118-x86_64-linux/paged_attention/_ops.py
deleted file mode 100644
index a0c7ecd509f9dc04c72c92c1c5389a8e04100860..0000000000000000000000000000000000000000
--- a/build/torch25-cxx98-cu118-x86_64-linux/paged_attention/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _paged_attention_daf6221
-ops = torch.ops._paged_attention_daf6221
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_paged_attention_daf6221::{op_name}"
\ No newline at end of file
diff --git a/build/torch25-cxx98-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so b/build/torch25-cxx98-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
deleted file mode 100755
index 35296121e4dedde54e5e90231cb4d67c35de1ba7..0000000000000000000000000000000000000000
--- a/build/torch25-cxx98-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7b0e57dbf9dbee157784cd0f2ba48bef1f5f33210a9ae13d911d0d8e0e5f0ae1
-size 91208240
diff --git a/build/torch25-cxx98-cu121-x86_64-linux/paged_attention/__init__.py b/build/torch25-cxx98-cu121-x86_64-linux/paged_attention/__init__.py
deleted file mode 100644
index 9de56043369487facc1f163df6bd319c9806e5ca..0000000000000000000000000000000000000000
--- a/build/torch25-cxx98-cu121-x86_64-linux/paged_attention/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from ._custom_ops import (
-    convert_fp8,
-    copy_blocks,
-    paged_attention_v1,
-    paged_attention_v2,
-    reshape_and_cache,
-    reshape_and_cache_flash,
-    swap_blocks,
-)
-from ._ops import ops
-
-__all__ = [
-    "convert_fp8",
-    "copy_blocks",
-    "ops",
-    "paged_attention_v1",
-    "paged_attention_v2",
-    "reshape_and_cache",
-    "reshape_and_cache_flash",
-    "swap_blocks",
-]
diff --git a/build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_custom_ops.py b/build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_custom_ops.py
deleted file mode 100644
index a0c0b8db085468dee5100c98d14106a9ee917bf2..0000000000000000000000000000000000000000
--- a/build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_custom_ops.py
+++ /dev/null
@@ -1,173 +0,0 @@
-from typing import List, Optional
-
-import torch
-
-from ._ops import ops
-
-
-# page attention ops
-def paged_attention_v1(
-    out: torch.Tensor,
-    query: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    num_kv_heads: int,
-    scale: float,
-    block_tables: torch.Tensor,
-    seq_lens: torch.Tensor,
-    block_size: int,
-    max_seq_len: int,
-    alibi_slopes: Optional[torch.Tensor],
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-    tp_rank: int = 0,
-    blocksparse_local_blocks: int = 0,
-    blocksparse_vert_stride: int = 0,
-    blocksparse_block_size: int = 64,
-    blocksparse_head_sliding_step: int = 0,
-) -> None:
-    ops.paged_attention_v1(
-        out,
-        query,
-        key_cache,
-        value_cache,
-        num_kv_heads,
-        scale,
-        block_tables,
-        seq_lens,
-        block_size,
-        max_seq_len,
-        alibi_slopes,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-        tp_rank,
-        blocksparse_local_blocks,
-        blocksparse_vert_stride,
-        blocksparse_block_size,
-        blocksparse_head_sliding_step,
-    )
-
-
-def paged_attention_v2(
-    out: torch.Tensor,
-    exp_sum: torch.Tensor,
-    max_logits: torch.Tensor,
-    tmp_out: torch.Tensor,
-    query: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    num_kv_heads: int,
-    scale: float,
-    block_tables: torch.Tensor,
-    seq_lens: torch.Tensor,
-    block_size: int,
-    max_seq_len: int,
-    alibi_slopes: Optional[torch.Tensor],
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-    tp_rank: int = 0,
-    blocksparse_local_blocks: int = 0,
-    blocksparse_vert_stride: int = 0,
-    blocksparse_block_size: int = 64,
-    blocksparse_head_sliding_step: int = 0,
-) -> None:
-    ops.paged_attention_v2(
-        out,
-        exp_sum,
-        max_logits,
-        tmp_out,
-        query,
-        key_cache,
-        value_cache,
-        num_kv_heads,
-        scale,
-        block_tables,
-        seq_lens,
-        block_size,
-        max_seq_len,
-        alibi_slopes,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-        tp_rank,
-        blocksparse_local_blocks,
-        blocksparse_vert_stride,
-        blocksparse_block_size,
-        blocksparse_head_sliding_step,
-    )
-
-
-def reshape_and_cache(
-    key: torch.Tensor,
-    value: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    slot_mapping: torch.Tensor,
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-) -> None:
-    ops.reshape_and_cache(
-        key,
-        value,
-        key_cache,
-        value_cache,
-        slot_mapping,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-    )
-
-
-def reshape_and_cache_flash(
-    key: torch.Tensor,
-    value: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    slot_mapping: torch.Tensor,
-    kv_cache_dtype: str,
-    k_scale: torch.Tensor,
-    v_scale: torch.Tensor,
-) -> None:
-    ops.reshape_and_cache_flash(
-        key,
-        value,
-        key_cache,
-        value_cache,
-        slot_mapping,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-    )
-
-
-def copy_blocks(
-    key_caches: List[torch.Tensor],
-    value_caches: List[torch.Tensor],
-    block_mapping: torch.Tensor,
-) -> None:
-    ops.copy_blocks(key_caches, value_caches, block_mapping)
-
-
-def swap_blocks(
-    src: torch.Tensor, dst: torch.Tensor, block_mapping: torch.Tensor
-) -> None:
-    ops.swap_blocks(src, dst, block_mapping)
-
-
-def convert_fp8(
-    output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8"
-) -> None:
-    ops.convert_fp8(output, input, scale, kv_dtype)
-
-
-__all__ = [
-    "convert_fp8",
-    "paged_attention_v1",
-    "paged_attention_v2",
-    "reshape_and_cache",
-    "copy_blocks",
-]
diff --git a/build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_ops.py b/build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_ops.py
deleted file mode 100644
index a0c7ecd509f9dc04c72c92c1c5389a8e04100860..0000000000000000000000000000000000000000
--- a/build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _paged_attention_daf6221
-ops = torch.ops._paged_attention_daf6221
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_paged_attention_daf6221::{op_name}"
\ No newline at end of file
diff --git a/build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so b/build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
deleted file mode 100755
index b9925b9b9d75c26e6afd70bb74208205591f724b..0000000000000000000000000000000000000000
--- a/build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:684a1670bd113fee852580728961adcc53b7adb8e563aa672df223e7bff0c9a6
-size 87913456
diff --git a/build/torch25-cxx98-cu121-x86_64-linux/paged_attention/platforms.py b/build/torch25-cxx98-cu121-x86_64-linux/paged_attention/platforms.py
deleted file mode 100644
index aa06132e74cd7fb634044a76e528979b02a3559b..0000000000000000000000000000000000000000
--- a/build/torch25-cxx98-cu121-x86_64-linux/paged_attention/platforms.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import os
-import random
-from abc import ABC, abstractmethod
-from functools import lru_cache, wraps
-from typing import Callable, ParamSpec, TypeVar
-
-import numpy as np
-import torch
-
-IS_ROCM = torch.version.hip is not None
-
-
-class Platform(ABC):
-    @classmethod
-    def seed_everything(cls, seed: int) -> None:
-        """
-        Set the seed of each random module.
-        `torch.manual_seed` will set seed on all devices.
-
-        Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
-        """
-        random.seed(seed)
-        np.random.seed(seed)
-        torch.manual_seed(seed)
-
-    @abstractmethod
-    def get_device_name(self, device_id: int = 0) -> str: ...
-
-    @abstractmethod
-    def is_cuda(self) -> bool: ...
-
-    @abstractmethod
-    def is_rocm(self) -> bool: ...
-
-
-class CudaPlatform(Platform):
-    @classmethod
-    @lru_cache(maxsize=8)
-    def get_device_name(cls, device_id: int = 0) -> str:
-        return torch.cuda.get_device_name(0)
-
-    def is_cuda(self) -> bool:
-        return True
-
-    def is_rocm(self) -> bool:
-        return False
-
-
-class RocmPlatform(Platform):
-    @classmethod
-    @lru_cache(maxsize=8)
-    def get_device_name(cls, device_id: int = 0) -> str:
-        return torch.cuda.get_device_name(device_id)
-
-    def is_cuda(self) -> bool:
-        return False
-
-    def is_rocm(self) -> bool:
-        return True
-
-
-current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
diff --git a/build/torch25-cxx98-cu124-x86_64-linux/paged_attention/__init__.py b/build/torch25-cxx98-cu124-x86_64-linux/paged_attention/__init__.py
deleted file mode 100644
index 9de56043369487facc1f163df6bd319c9806e5ca..0000000000000000000000000000000000000000
--- a/build/torch25-cxx98-cu124-x86_64-linux/paged_attention/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from ._custom_ops import (
-    convert_fp8,
-    copy_blocks,
-    paged_attention_v1,
-    paged_attention_v2,
-    reshape_and_cache,
-    reshape_and_cache_flash,
-    swap_blocks,
-)
-from ._ops import ops
-
-__all__ = [
-    "convert_fp8",
-    "copy_blocks",
-    "ops",
-    "paged_attention_v1",
-    "paged_attention_v2",
-    "reshape_and_cache",
-    "reshape_and_cache_flash",
-    "swap_blocks",
-]
diff --git a/build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_custom_ops.py b/build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_custom_ops.py
deleted file mode 100644
index a0c0b8db085468dee5100c98d14106a9ee917bf2..0000000000000000000000000000000000000000
--- a/build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_custom_ops.py
+++ /dev/null
@@ -1,173 +0,0 @@
-from typing import List, Optional
-
-import torch
-
-from ._ops import ops
-
-
-# page attention ops
-def paged_attention_v1(
-    out: torch.Tensor,
-    query: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    num_kv_heads: int,
-    scale: float,
-    block_tables: torch.Tensor,
-    seq_lens: torch.Tensor,
-    block_size: int,
-    max_seq_len: int,
-    alibi_slopes: Optional[torch.Tensor],
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-    tp_rank: int = 0,
-    blocksparse_local_blocks: int = 0,
-    blocksparse_vert_stride: int = 0,
-    blocksparse_block_size: int = 64,
-    blocksparse_head_sliding_step: int = 0,
-) -> None:
-    ops.paged_attention_v1(
-        out,
-        query,
-        key_cache,
-        value_cache,
-        num_kv_heads,
-        scale,
-        block_tables,
-        seq_lens,
-        block_size,
-        max_seq_len,
-        alibi_slopes,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-        tp_rank,
-        blocksparse_local_blocks,
-        blocksparse_vert_stride,
-        blocksparse_block_size,
-        blocksparse_head_sliding_step,
-    )
-
-
-def paged_attention_v2(
-    out: torch.Tensor,
-    exp_sum: torch.Tensor,
-    max_logits: torch.Tensor,
-    tmp_out: torch.Tensor,
-    query: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    num_kv_heads: int,
-    scale: float,
-    block_tables: torch.Tensor,
-    seq_lens: torch.Tensor,
-    block_size: int,
-    max_seq_len: int,
-    alibi_slopes: Optional[torch.Tensor],
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-    tp_rank: int = 0,
-    blocksparse_local_blocks: int = 0,
-    blocksparse_vert_stride: int = 0,
-    blocksparse_block_size: int = 64,
-    blocksparse_head_sliding_step: int = 0,
-) -> None:
-    ops.paged_attention_v2(
-        out,
-        exp_sum,
-        max_logits,
-        tmp_out,
-        query,
-        key_cache,
-        value_cache,
-        num_kv_heads,
-        scale,
-        block_tables,
-        seq_lens,
-        block_size,
-        max_seq_len,
-        alibi_slopes,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-        tp_rank,
-        blocksparse_local_blocks,
-        blocksparse_vert_stride,
-        blocksparse_block_size,
-        blocksparse_head_sliding_step,
-    )
-
-
-def reshape_and_cache(
-    key: torch.Tensor,
-    value: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    slot_mapping: torch.Tensor,
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-) -> None:
-    ops.reshape_and_cache(
-        key,
-        value,
-        key_cache,
-        value_cache,
-        slot_mapping,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-    )
-
-
-def reshape_and_cache_flash(
-    key: torch.Tensor,
-    value: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    slot_mapping: torch.Tensor,
-    kv_cache_dtype: str,
-    k_scale: torch.Tensor,
-    v_scale: torch.Tensor,
-) -> None:
-    ops.reshape_and_cache_flash(
-        key,
-        value,
-        key_cache,
-        value_cache,
-        slot_mapping,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-    )
-
-
-def copy_blocks(
-    key_caches: List[torch.Tensor],
-    value_caches: List[torch.Tensor],
-    block_mapping: torch.Tensor,
-) -> None:
-    ops.copy_blocks(key_caches, value_caches, block_mapping)
-
-
-def swap_blocks(
-    src: torch.Tensor, dst: torch.Tensor, block_mapping: torch.Tensor
-) -> None:
-    ops.swap_blocks(src, dst, block_mapping)
-
-
-def convert_fp8(
-    output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8"
-) -> None:
-    ops.convert_fp8(output, input, scale, kv_dtype)
-
-
-__all__ = [
-    "convert_fp8",
-    "paged_attention_v1",
-    "paged_attention_v2",
-    "reshape_and_cache",
-    "copy_blocks",
-]
diff --git a/build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_ops.py b/build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_ops.py
deleted file mode 100644
index a0c7ecd509f9dc04c72c92c1c5389a8e04100860..0000000000000000000000000000000000000000
--- a/build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _paged_attention_daf6221
-ops = torch.ops._paged_attention_daf6221
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_paged_attention_daf6221::{op_name}"
\ No newline at end of file
diff --git a/build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so b/build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
deleted file mode 100755
index 0967547fda511cdf0b33750506cb947bee8c10ac..0000000000000000000000000000000000000000
--- a/build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d4fb3e5dd163dfce3d3032ff1969be0e96eff1568312935c8747beb402d6a2fd
-size 88021624
diff --git a/build/torch25-cxx98-cu124-x86_64-linux/paged_attention/platforms.py b/build/torch25-cxx98-cu124-x86_64-linux/paged_attention/platforms.py
deleted file mode 100644
index aa06132e74cd7fb634044a76e528979b02a3559b..0000000000000000000000000000000000000000
--- a/build/torch25-cxx98-cu124-x86_64-linux/paged_attention/platforms.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import os
-import random
-from abc import ABC, abstractmethod
-from functools import lru_cache, wraps
-from typing import Callable, ParamSpec, TypeVar
-
-import numpy as np
-import torch
-
-IS_ROCM = torch.version.hip is not None
-
-
-class Platform(ABC):
-    @classmethod
-    def seed_everything(cls, seed: int) -> None:
-        """
-        Set the seed of each random module.
-        `torch.manual_seed` will set seed on all devices.
-
-        Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
-        """
-        random.seed(seed)
-        np.random.seed(seed)
-        torch.manual_seed(seed)
-
-    @abstractmethod
-    def get_device_name(self, device_id: int = 0) -> str: ...
-
-    @abstractmethod
-    def is_cuda(self) -> bool: ...
-
-    @abstractmethod
-    def is_rocm(self) -> bool: ...
-
-
-class CudaPlatform(Platform):
-    @classmethod
-    @lru_cache(maxsize=8)
-    def get_device_name(cls, device_id: int = 0) -> str:
-        return torch.cuda.get_device_name(0)
-
-    def is_cuda(self) -> bool:
-        return True
-
-    def is_rocm(self) -> bool:
-        return False
-
-
-class RocmPlatform(Platform):
-    @classmethod
-    @lru_cache(maxsize=8)
-    def get_device_name(cls, device_id: int = 0) -> str:
-        return torch.cuda.get_device_name(device_id)
-
-    def is_cuda(self) -> bool:
-        return False
-
-    def is_rocm(self) -> bool:
-        return True
-
-
-current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
diff --git a/build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc b/build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..552263781d95d35d01fd8c2ae0798549527d5ced
Binary files /dev/null and b/build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc differ
diff --git a/build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc b/build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cfe7a33504e57c0e583728719de54e97e877af5f
Binary files /dev/null and b/build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc differ
diff --git a/build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc b/build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..70b023a100bbc9344377d69b765d1731f881b1ff
Binary files /dev/null and b/build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc differ
diff --git a/build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_ops.py b/build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_ops.py
index a0c7ecd509f9dc04c72c92c1c5389a8e04100860..56d920dbf5b60b10a3444a94a1035d9e72a0df99 100644
--- a/build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_ops.py
+++ b/build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _paged_attention_daf6221
-ops = torch.ops._paged_attention_daf6221
+from . import _paged_attention_6677800
+ops = torch.ops._paged_attention_6677800
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_paged_attention_daf6221::{op_name}"
\ No newline at end of file
+    return f"_paged_attention_6677800::{op_name}"
\ No newline at end of file
diff --git a/build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so b/build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..451db0c23548212c70d50877ba38b4c6a479107f
--- /dev/null
+++ b/build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08e178e566aba62bbe98ba07bb0f83784095cb0d2fec7946a8d5773ca8e550ae
+size 91845160
diff --git a/build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so b/build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
deleted file mode 100755
index d874b4ec16f2b4a2c1bf180fd13a82916c5128c5..0000000000000000000000000000000000000000
--- a/build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:14b647a190ade84ab061629eb9cfbf7bf104e653c6389609e85e92fa359a828e
-size 91219624
diff --git a/build/torch26-cxx11-cu118-x86_64-linux/paged_attention/platforms.py b/build/torch26-cxx11-cu118-x86_64-linux/paged_attention/platforms.py
index aa06132e74cd7fb634044a76e528979b02a3559b..6277d5f50ff3ddc265bb39fa1c4d17e0341b7767 100644
--- a/build/torch26-cxx11-cu118-x86_64-linux/paged_attention/platforms.py
+++ b/build/torch26-cxx11-cu118-x86_64-linux/paged_attention/platforms.py
@@ -8,6 +8,7 @@ import numpy as np
 import torch
 
 IS_ROCM = torch.version.hip is not None
+IS_MPS = torch.backends.mps.is_available()
 
 
 class Platform(ABC):
@@ -32,6 +33,9 @@ class Platform(ABC):
     @abstractmethod
     def is_rocm(self) -> bool: ...
 
+    @abstractmethod
+    def is_mps(self) -> bool: ...
+
 
 class CudaPlatform(Platform):
     @classmethod
@@ -45,6 +49,9 @@ class CudaPlatform(Platform):
     def is_rocm(self) -> bool:
         return False
 
+    def is_mps(self) -> bool:
+        return False
+
 
 class RocmPlatform(Platform):
     @classmethod
@@ -58,5 +65,28 @@ class RocmPlatform(Platform):
     def is_rocm(self) -> bool:
         return True
 
+    def is_mps(self) -> bool:
+        return False
+
+
+class MpsPlatform(Platform):
+    @classmethod
+    @lru_cache(maxsize=8)
+    def get_device_name(cls, device_id: int = 0) -> str:
+        return torch.cuda.get_device_name(device_id)
+
+    def is_cuda(self) -> bool:
+        return False
+
+    def is_rocm(self) -> bool:
+        return False
+
+    def is_mps(self) -> bool:
+        return True
 
-current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
+current_platform = (
+    RocmPlatform() if IS_ROCM else
+    MpsPlatform() if IS_MPS else
+    CudaPlatform() if torch.cuda.is_available() else
+    None
+)
diff --git a/build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc b/build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..32c9d0e881deaa82d4b9b8a62bfcca1b7a4b415c
Binary files /dev/null and b/build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc differ
diff --git a/build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc b/build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..41099788420022287a62a31c9a94ecf7c50b9547
Binary files /dev/null and b/build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc differ
diff --git a/build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc b/build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ae0a49c6d1a33b84c622550d099a2a55ab0dc864
Binary files /dev/null and b/build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc differ
diff --git a/build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_ops.py b/build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_ops.py
index a0c7ecd509f9dc04c72c92c1c5389a8e04100860..56d920dbf5b60b10a3444a94a1035d9e72a0df99 100644
--- a/build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_ops.py
+++ b/build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _paged_attention_daf6221
-ops = torch.ops._paged_attention_daf6221
+from . import _paged_attention_6677800
+ops = torch.ops._paged_attention_6677800
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_paged_attention_daf6221::{op_name}"
\ No newline at end of file
+    return f"_paged_attention_6677800::{op_name}"
\ No newline at end of file
diff --git a/build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so b/build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..683e4227f6f6118161d7aa77f017ca8669d0d0d5
--- /dev/null
+++ b/build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1bc02dd09d997be7c2d3c996d5716ff269b4e4094b6cab70f4ae73c3763c36aa
+size 88666456
diff --git a/build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so b/build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
deleted file mode 100755
index 0869979223750c00ab68104701677401b18df5a4..0000000000000000000000000000000000000000
--- a/build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b7faf4c16cf3cae3d4feead6195ece3f0865e602df4b04804e075697b72d5afa
-size 88040824
diff --git a/build/torch26-cxx11-cu124-x86_64-linux/paged_attention/platforms.py b/build/torch26-cxx11-cu124-x86_64-linux/paged_attention/platforms.py
index aa06132e74cd7fb634044a76e528979b02a3559b..6277d5f50ff3ddc265bb39fa1c4d17e0341b7767 100644
--- a/build/torch26-cxx11-cu124-x86_64-linux/paged_attention/platforms.py
+++ b/build/torch26-cxx11-cu124-x86_64-linux/paged_attention/platforms.py
@@ -8,6 +8,7 @@ import numpy as np
 import torch
 
 IS_ROCM = torch.version.hip is not None
+IS_MPS = torch.backends.mps.is_available()
 
 
 class Platform(ABC):
@@ -32,6 +33,9 @@ class Platform(ABC):
     @abstractmethod
     def is_rocm(self) -> bool: ...
 
+    @abstractmethod
+    def is_mps(self) -> bool: ...
+
 
 class CudaPlatform(Platform):
     @classmethod
@@ -45,6 +49,9 @@ class CudaPlatform(Platform):
     def is_rocm(self) -> bool:
         return False
 
+    def is_mps(self) -> bool:
+        return False
+
 
 class RocmPlatform(Platform):
     @classmethod
@@ -58,5 +65,28 @@ class RocmPlatform(Platform):
     def is_rocm(self) -> bool:
         return True
 
+    def is_mps(self) -> bool:
+        return False
+
+
+class MpsPlatform(Platform):
+    @classmethod
+    @lru_cache(maxsize=8)
+    def get_device_name(cls, device_id: int = 0) -> str:
+        return torch.cuda.get_device_name(device_id)
+
+    def is_cuda(self) -> bool:
+        return False
+
+    def is_rocm(self) -> bool:
+        return False
+
+    def is_mps(self) -> bool:
+        return True
 
-current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
+current_platform = (
+    RocmPlatform() if IS_ROCM else
+    MpsPlatform() if IS_MPS else
+    CudaPlatform() if torch.cuda.is_available() else
+    None
+)
diff --git a/build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc b/build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7966f507207a15bc57b4637c3973612f222c3ec1
Binary files /dev/null and b/build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc differ
diff --git a/build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc b/build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b20df8d2dfd9780cddc31133742a6d4d84bebc0a
Binary files /dev/null and b/build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc differ
diff --git a/build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc b/build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c69ef7d3ff7ee868afff255af248a3d27c513fa1
Binary files /dev/null and b/build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc differ
diff --git a/build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_ops.py b/build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_ops.py
index a0c7ecd509f9dc04c72c92c1c5389a8e04100860..56d920dbf5b60b10a3444a94a1035d9e72a0df99 100644
--- a/build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_ops.py
+++ b/build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _paged_attention_daf6221
-ops = torch.ops._paged_attention_daf6221
+from . import _paged_attention_6677800
+ops = torch.ops._paged_attention_6677800
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_paged_attention_daf6221::{op_name}"
\ No newline at end of file
+    return f"_paged_attention_6677800::{op_name}"
\ No newline at end of file
diff --git a/build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so b/build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..57c64eaf0d2415e30608222f19f3ee08788363ce
--- /dev/null
+++ b/build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5077b6b8fffb349c79738c345b02903f643aa9530d10269ea143e8f3125d10e9
+size 88425448
diff --git a/build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so b/build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
deleted file mode 100755
index 2e069ee70e7521c3bfe222b039a45b37bce07615..0000000000000000000000000000000000000000
--- a/build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b6b6d748077a864b5f3a9b5973cf453d00a685b70654b5a40630891a86d19230
-size 87803904
diff --git a/build/torch26-cxx11-cu126-x86_64-linux/paged_attention/platforms.py b/build/torch26-cxx11-cu126-x86_64-linux/paged_attention/platforms.py
index aa06132e74cd7fb634044a76e528979b02a3559b..6277d5f50ff3ddc265bb39fa1c4d17e0341b7767 100644
--- a/build/torch26-cxx11-cu126-x86_64-linux/paged_attention/platforms.py
+++ b/build/torch26-cxx11-cu126-x86_64-linux/paged_attention/platforms.py
@@ -8,6 +8,7 @@ import numpy as np
 import torch
 
 IS_ROCM = torch.version.hip is not None
+IS_MPS = torch.backends.mps.is_available()
 
 
 class Platform(ABC):
@@ -32,6 +33,9 @@ class Platform(ABC):
     @abstractmethod
     def is_rocm(self) -> bool: ...
 
+    @abstractmethod
+    def is_mps(self) -> bool: ...
+
 
 class CudaPlatform(Platform):
     @classmethod
@@ -45,6 +49,9 @@ class CudaPlatform(Platform):
     def is_rocm(self) -> bool:
         return False
 
+    def is_mps(self) -> bool:
+        return False
+
 
 class RocmPlatform(Platform):
     @classmethod
@@ -58,5 +65,28 @@ class RocmPlatform(Platform):
     def is_rocm(self) -> bool:
         return True
 
+    def is_mps(self) -> bool:
+        return False
+
+
+class MpsPlatform(Platform):
+    @classmethod
+    @lru_cache(maxsize=8)
+    def get_device_name(cls, device_id: int = 0) -> str:
+        return torch.cuda.get_device_name(device_id)
+
+    def is_cuda(self) -> bool:
+        return False
+
+    def is_rocm(self) -> bool:
+        return False
+
+    def is_mps(self) -> bool:
+        return True
 
-current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
+current_platform = (
+    RocmPlatform() if IS_ROCM else
+    MpsPlatform() if IS_MPS else
+    CudaPlatform() if torch.cuda.is_available() else
+    None
+)
diff --git a/build/torch25-cxx11-cu118-x86_64-linux/paged_attention/__init__.py b/build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__init__.py
similarity index 100%
rename from build/torch25-cxx11-cu118-x86_64-linux/paged_attention/__init__.py
rename to build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__init__.py
diff --git a/build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc b/build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f845aecac7acd96da227c219ba333273b4a8d478
Binary files /dev/null and b/build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc differ
diff --git a/build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc b/build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6653f396da33cc12cd837d2ed4d24ae4c3168cfc
Binary files /dev/null and b/build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc differ
diff --git a/build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc b/build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..257bc2c6462caf7eec6fe4e43057fabea4de8587
Binary files /dev/null and b/build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc differ
diff --git a/build/torch25-cxx11-cu118-x86_64-linux/paged_attention/_custom_ops.py b/build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/_custom_ops.py
similarity index 100%
rename from build/torch25-cxx11-cu118-x86_64-linux/paged_attention/_custom_ops.py
rename to build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/_custom_ops.py
diff --git a/build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/_ops.py b/build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..56d920dbf5b60b10a3444a94a1035d9e72a0df99
--- /dev/null
+++ b/build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _paged_attention_6677800
+ops = torch.ops._paged_attention_6677800
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_paged_attention_6677800::{op_name}"
\ No newline at end of file
diff --git a/build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so b/build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..31f13481b3d3dc81468ea82fc3002d70f313c0a4
--- /dev/null
+++ b/build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b4781f634ed1cceabfe9b00e485748ac4e179714ec5e8a3a67475b5a1072fb5
+size 133021344
diff --git a/build/torch25-cxx11-cu121-x86_64-linux/paged_attention/platforms.py b/build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/platforms.py
similarity index 67%
rename from build/torch25-cxx11-cu121-x86_64-linux/paged_attention/platforms.py
rename to build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/platforms.py
index aa06132e74cd7fb634044a76e528979b02a3559b..6277d5f50ff3ddc265bb39fa1c4d17e0341b7767 100644
--- a/build/torch25-cxx11-cu121-x86_64-linux/paged_attention/platforms.py
+++ b/build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/platforms.py
@@ -8,6 +8,7 @@ import numpy as np
 import torch
 
 IS_ROCM = torch.version.hip is not None
+IS_MPS = torch.backends.mps.is_available()
 
 
 class Platform(ABC):
@@ -32,6 +33,9 @@ class Platform(ABC):
     @abstractmethod
     def is_rocm(self) -> bool: ...
 
+    @abstractmethod
+    def is_mps(self) -> bool: ...
+
 
 class CudaPlatform(Platform):
     @classmethod
@@ -45,6 +49,9 @@ class CudaPlatform(Platform):
     def is_rocm(self) -> bool:
         return False
 
+    def is_mps(self) -> bool:
+        return False
+
 
 class RocmPlatform(Platform):
     @classmethod
@@ -58,5 +65,28 @@ class RocmPlatform(Platform):
     def is_rocm(self) -> bool:
         return True
 
+    def is_mps(self) -> bool:
+        return False
+
+
+class MpsPlatform(Platform):
+    @classmethod
+    @lru_cache(maxsize=8)
+    def get_device_name(cls, device_id: int = 0) -> str:
+        return torch.cuda.get_device_name(device_id)
+
+    def is_cuda(self) -> bool:
+        return False
+
+    def is_rocm(self) -> bool:
+        return False
+
+    def is_mps(self) -> bool:
+        return True
 
-current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
+current_platform = (
+    RocmPlatform() if IS_ROCM else
+    MpsPlatform() if IS_MPS else
+    CudaPlatform() if torch.cuda.is_available() else
+    None
+)
diff --git a/build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc b/build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..74ae7c917695858b14316cfe297f06d556a2f83c
Binary files /dev/null and b/build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc differ
diff --git a/build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc b/build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2a2b987edcaf3deca391b77095bfd95791e6ce83
Binary files /dev/null and b/build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc differ
diff --git a/build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc b/build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..697d384ad4f801a99899b26227f79128286c37f0
Binary files /dev/null and b/build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc differ
diff --git a/build/torch26-cxx98-cu118-x86_64-linux/paged_attention/_ops.py b/build/torch26-cxx98-cu118-x86_64-linux/paged_attention/_ops.py
index a0c7ecd509f9dc04c72c92c1c5389a8e04100860..56d920dbf5b60b10a3444a94a1035d9e72a0df99 100644
--- a/build/torch26-cxx98-cu118-x86_64-linux/paged_attention/_ops.py
+++ b/build/torch26-cxx98-cu118-x86_64-linux/paged_attention/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _paged_attention_daf6221
-ops = torch.ops._paged_attention_daf6221
+from . import _paged_attention_6677800
+ops = torch.ops._paged_attention_6677800
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_paged_attention_daf6221::{op_name}"
\ No newline at end of file
+    return f"_paged_attention_6677800::{op_name}"
\ No newline at end of file
diff --git a/build/torch26-cxx98-cu118-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so b/build/torch26-cxx98-cu118-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..a84155e9d34f24ccbf02173a98e96343bb56438e
--- /dev/null
+++ b/build/torch26-cxx98-cu118-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5ad161ff6226eb3f697c3fcec6051c70ca5bc0a66332f927a3cc3ecb39c34dd
+size 91821840
diff --git a/build/torch26-cxx98-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so b/build/torch26-cxx98-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
deleted file mode 100755
index e33749e59aeec571ee057c4fb1edc2f4520669ef..0000000000000000000000000000000000000000
--- a/build/torch26-cxx98-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f36fdfd9ba38856a37aa65cec932052a83dbc6b194d1430333ba07144e3b1552
-size 91192360
diff --git a/build/torch26-cxx98-cu118-x86_64-linux/paged_attention/platforms.py b/build/torch26-cxx98-cu118-x86_64-linux/paged_attention/platforms.py
index aa06132e74cd7fb634044a76e528979b02a3559b..6277d5f50ff3ddc265bb39fa1c4d17e0341b7767 100644
--- a/build/torch26-cxx98-cu118-x86_64-linux/paged_attention/platforms.py
+++ b/build/torch26-cxx98-cu118-x86_64-linux/paged_attention/platforms.py
@@ -8,6 +8,7 @@ import numpy as np
 import torch
 
 IS_ROCM = torch.version.hip is not None
+IS_MPS = torch.backends.mps.is_available()
 
 
 class Platform(ABC):
@@ -32,6 +33,9 @@ class Platform(ABC):
     @abstractmethod
     def is_rocm(self) -> bool: ...
 
+    @abstractmethod
+    def is_mps(self) -> bool: ...
+
 
 class CudaPlatform(Platform):
     @classmethod
@@ -45,6 +49,9 @@ class CudaPlatform(Platform):
     def is_rocm(self) -> bool:
         return False
 
+    def is_mps(self) -> bool:
+        return False
+
 
 class RocmPlatform(Platform):
     @classmethod
@@ -58,5 +65,28 @@ class RocmPlatform(Platform):
     def is_rocm(self) -> bool:
         return True
 
+    def is_mps(self) -> bool:
+        return False
+
+
+class MpsPlatform(Platform):
+    @classmethod
+    @lru_cache(maxsize=8)
+    def get_device_name(cls, device_id: int = 0) -> str:
+        return torch.cuda.get_device_name(device_id)
+
+    def is_cuda(self) -> bool:
+        return False
+
+    def is_rocm(self) -> bool:
+        return False
+
+    def is_mps(self) -> bool:
+        return True
 
-current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
+current_platform = (
+    RocmPlatform() if IS_ROCM else
+    MpsPlatform() if IS_MPS else
+    CudaPlatform() if torch.cuda.is_available() else
+    None
+)
diff --git a/build/torch26-cxx98-cu124-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc b/build/torch26-cxx98-cu124-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bab534f86e8f1a8efb4b4fdb047b4d5fcdc421eb
Binary files /dev/null and b/build/torch26-cxx98-cu124-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc differ
diff --git a/build/torch26-cxx98-cu124-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc b/build/torch26-cxx98-cu124-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..765bc670ab5469ddd17eb1446a4b66fabc2225d5
Binary files /dev/null and b/build/torch26-cxx98-cu124-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc differ
diff --git a/build/torch26-cxx98-cu124-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc b/build/torch26-cxx98-cu124-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6f2f0bd6e3c9efcb39a84af2b74b3b2751f29db8
Binary files /dev/null and b/build/torch26-cxx98-cu124-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc differ
diff --git a/build/torch26-cxx98-cu124-x86_64-linux/paged_attention/_ops.py b/build/torch26-cxx98-cu124-x86_64-linux/paged_attention/_ops.py
index a0c7ecd509f9dc04c72c92c1c5389a8e04100860..56d920dbf5b60b10a3444a94a1035d9e72a0df99 100644
--- a/build/torch26-cxx98-cu124-x86_64-linux/paged_attention/_ops.py
+++ b/build/torch26-cxx98-cu124-x86_64-linux/paged_attention/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _paged_attention_daf6221
-ops = torch.ops._paged_attention_daf6221
+from . import _paged_attention_6677800
+ops = torch.ops._paged_attention_6677800
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_paged_attention_daf6221::{op_name}"
\ No newline at end of file
+    return f"_paged_attention_6677800::{op_name}"
\ No newline at end of file
diff --git a/build/torch26-cxx98-cu124-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so b/build/torch26-cxx98-cu124-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..7dc22c44a910d4ef938effe0bde68289f627886c
--- /dev/null
+++ b/build/torch26-cxx98-cu124-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9dc095cafd06c184ecb8e2268bf1a4dbded38098c84880bdd9beb87a856a553f
+size 88631224
diff --git a/build/torch26-cxx98-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so b/build/torch26-cxx98-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
deleted file mode 100755
index 4fd10382f47723eacbbaa7466a0ab64b25019de6..0000000000000000000000000000000000000000
--- a/build/torch26-cxx98-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4ad4d0b58ba16d3d29bf036d39141cd88e734044d2a74bfdc220fac232763e17
-size 88009784
diff --git a/build/torch26-cxx98-cu124-x86_64-linux/paged_attention/platforms.py b/build/torch26-cxx98-cu124-x86_64-linux/paged_attention/platforms.py
index aa06132e74cd7fb634044a76e528979b02a3559b..6277d5f50ff3ddc265bb39fa1c4d17e0341b7767 100644
--- a/build/torch26-cxx98-cu124-x86_64-linux/paged_attention/platforms.py
+++ b/build/torch26-cxx98-cu124-x86_64-linux/paged_attention/platforms.py
@@ -8,6 +8,7 @@ import numpy as np
 import torch
 
 IS_ROCM = torch.version.hip is not None
+IS_MPS = torch.backends.mps.is_available()
 
 
 class Platform(ABC):
@@ -32,6 +33,9 @@ class Platform(ABC):
     @abstractmethod
     def is_rocm(self) -> bool: ...
 
+    @abstractmethod
+    def is_mps(self) -> bool: ...
+
 
 class CudaPlatform(Platform):
     @classmethod
@@ -45,6 +49,9 @@ class CudaPlatform(Platform):
     def is_rocm(self) -> bool:
         return False
 
+    def is_mps(self) -> bool:
+        return False
+
 
 class RocmPlatform(Platform):
     @classmethod
@@ -58,5 +65,28 @@ class RocmPlatform(Platform):
     def is_rocm(self) -> bool:
         return True
 
+    def is_mps(self) -> bool:
+        return False
+
+
+class MpsPlatform(Platform):
+    @classmethod
+    @lru_cache(maxsize=8)
+    def get_device_name(cls, device_id: int = 0) -> str:
+        return torch.cuda.get_device_name(device_id)
+
+    def is_cuda(self) -> bool:
+        return False
+
+    def is_rocm(self) -> bool:
+        return False
+
+    def is_mps(self) -> bool:
+        return True
 
-current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
+current_platform = (
+    RocmPlatform() if IS_ROCM else
+    MpsPlatform() if IS_MPS else
+    CudaPlatform() if torch.cuda.is_available() else
+    None
+)
diff --git a/build/torch26-cxx98-cu126-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc b/build/torch26-cxx98-cu126-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..86adb2c008ed0396c83d39b83a3df64479afee7f
Binary files /dev/null and b/build/torch26-cxx98-cu126-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc differ
diff --git a/build/torch26-cxx98-cu126-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc b/build/torch26-cxx98-cu126-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2ecca74d686eae4c004b3f179b1be558cb3796e9
Binary files /dev/null and b/build/torch26-cxx98-cu126-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc differ
diff --git a/build/torch26-cxx98-cu126-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc b/build/torch26-cxx98-cu126-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dbd7935a0ff33223d9849d3907c8026a18b88214
Binary files /dev/null and b/build/torch26-cxx98-cu126-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc differ
diff --git a/build/torch26-cxx98-cu126-x86_64-linux/paged_attention/_ops.py b/build/torch26-cxx98-cu126-x86_64-linux/paged_attention/_ops.py
index a0c7ecd509f9dc04c72c92c1c5389a8e04100860..56d920dbf5b60b10a3444a94a1035d9e72a0df99 100644
--- a/build/torch26-cxx98-cu126-x86_64-linux/paged_attention/_ops.py
+++ b/build/torch26-cxx98-cu126-x86_64-linux/paged_attention/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _paged_attention_daf6221
-ops = torch.ops._paged_attention_daf6221
+from . import _paged_attention_6677800
+ops = torch.ops._paged_attention_6677800
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_paged_attention_daf6221::{op_name}"
\ No newline at end of file
+    return f"_paged_attention_6677800::{op_name}"
\ No newline at end of file
diff --git a/build/torch26-cxx98-cu126-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so b/build/torch26-cxx98-cu126-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..17d65fda6d18a56ca70a37150b1cb60012cb68d7
--- /dev/null
+++ b/build/torch26-cxx98-cu126-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe20bc582f45af4d0819856e77a7aa31a7d7ef4f821e362e7972a4e8bb6c2eba
+size 88390208
diff --git a/build/torch26-cxx98-cu126-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so b/build/torch26-cxx98-cu126-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
deleted file mode 100755
index 61a53367752353d665e55ddef70ec42ef858e952..0000000000000000000000000000000000000000
--- a/build/torch26-cxx98-cu126-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f805235d5c7b25e4f942c86aec3d32424172e447c0773c5216d0e2e7226f8eac
-size 87768776
diff --git a/build/torch26-cxx98-cu126-x86_64-linux/paged_attention/platforms.py b/build/torch26-cxx98-cu126-x86_64-linux/paged_attention/platforms.py
index aa06132e74cd7fb634044a76e528979b02a3559b..6277d5f50ff3ddc265bb39fa1c4d17e0341b7767 100644
--- a/build/torch26-cxx98-cu126-x86_64-linux/paged_attention/platforms.py
+++ b/build/torch26-cxx98-cu126-x86_64-linux/paged_attention/platforms.py
@@ -8,6 +8,7 @@ import numpy as np
 import torch
 
 IS_ROCM = torch.version.hip is not None
+IS_MPS = torch.backends.mps.is_available()
 
 
 class Platform(ABC):
@@ -32,6 +33,9 @@ class Platform(ABC):
     @abstractmethod
     def is_rocm(self) -> bool: ...
 
+    @abstractmethod
+    def is_mps(self) -> bool: ...
+
 
 class CudaPlatform(Platform):
     @classmethod
@@ -45,6 +49,9 @@ class CudaPlatform(Platform):
     def is_rocm(self) -> bool:
         return False
 
+    def is_mps(self) -> bool:
+        return False
+
 
 class RocmPlatform(Platform):
     @classmethod
@@ -58,5 +65,28 @@ class RocmPlatform(Platform):
     def is_rocm(self) -> bool:
         return True
 
+    def is_mps(self) -> bool:
+        return False
+
+
+class MpsPlatform(Platform):
+    @classmethod
+    @lru_cache(maxsize=8)
+    def get_device_name(cls, device_id: int = 0) -> str:
+        return torch.cuda.get_device_name(device_id)
+
+    def is_cuda(self) -> bool:
+        return False
+
+    def is_rocm(self) -> bool:
+        return False
+
+    def is_mps(self) -> bool:
+        return True
 
-current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
+current_platform = (
+    RocmPlatform() if IS_ROCM else
+    MpsPlatform() if IS_MPS else
+    CudaPlatform() if torch.cuda.is_available() else
+    None
+)
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc b/build/torch27-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..70c6994c7462eaf4b46ea7a83ace57daeab52fb1
Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc differ
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc b/build/torch27-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..731970eb28e385f3e9f247f81f62632cca0d2495
Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc differ
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc b/build/torch27-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..53249ceb7dff4629772aad669fa0736f6d76af11
Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc differ
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/paged_attention/_ops.py b/build/torch27-cxx11-cu118-x86_64-linux/paged_attention/_ops.py
index a0c7ecd509f9dc04c72c92c1c5389a8e04100860..56d920dbf5b60b10a3444a94a1035d9e72a0df99 100644
--- a/build/torch27-cxx11-cu118-x86_64-linux/paged_attention/_ops.py
+++ b/build/torch27-cxx11-cu118-x86_64-linux/paged_attention/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _paged_attention_daf6221
-ops = torch.ops._paged_attention_daf6221
+from . import _paged_attention_6677800
+ops = torch.ops._paged_attention_6677800
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_paged_attention_daf6221::{op_name}"
\ No newline at end of file
+    return f"_paged_attention_6677800::{op_name}"
\ No newline at end of file
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so b/build/torch27-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..ef90bd1abefd47d7a65d18855b4abe933604a779
--- /dev/null
+++ b/build/torch27-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f7e9598cbcf88d836c2cbf737dafc4513e373b81f383acfce0aa74227600a166
+size 91845296
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so b/build/torch27-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
deleted file mode 100755
index c83500645e392588a7b8b1b80d39c6a4aac0d33f..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a2ddccf6fced71b8079bb8e9a6e12f063294110fa86585bed759e65f4717c1da
-size 91219752
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/paged_attention/platforms.py b/build/torch27-cxx11-cu118-x86_64-linux/paged_attention/platforms.py
index aa06132e74cd7fb634044a76e528979b02a3559b..6277d5f50ff3ddc265bb39fa1c4d17e0341b7767 100644
--- a/build/torch27-cxx11-cu118-x86_64-linux/paged_attention/platforms.py
+++ b/build/torch27-cxx11-cu118-x86_64-linux/paged_attention/platforms.py
@@ -8,6 +8,7 @@ import numpy as np
 import torch
 
 IS_ROCM = torch.version.hip is not None
+IS_MPS = torch.backends.mps.is_available()
 
 
 class Platform(ABC):
@@ -32,6 +33,9 @@ class Platform(ABC):
     @abstractmethod
     def is_rocm(self) -> bool: ...
 
+    @abstractmethod
+    def is_mps(self) -> bool: ...
+
 
 class CudaPlatform(Platform):
     @classmethod
@@ -45,6 +49,9 @@ class CudaPlatform(Platform):
     def is_rocm(self) -> bool:
         return False
 
+    def is_mps(self) -> bool:
+        return False
+
 
 class RocmPlatform(Platform):
     @classmethod
@@ -58,5 +65,28 @@ class RocmPlatform(Platform):
     def is_rocm(self) -> bool:
         return True
 
+    def is_mps(self) -> bool:
+        return False
+
+
+class MpsPlatform(Platform):
+    @classmethod
+    @lru_cache(maxsize=8)
+    def get_device_name(cls, device_id: int = 0) -> str:
+        return torch.cuda.get_device_name(device_id)
+
+    def is_cuda(self) -> bool:
+        return False
+
+    def is_rocm(self) -> bool:
+        return False
+
+    def is_mps(self) -> bool:
+        return True
 
-current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
+current_platform = (
+    RocmPlatform() if IS_ROCM else
+    MpsPlatform() if IS_MPS else
+    CudaPlatform() if torch.cuda.is_available() else
+    None
+)
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc b/build/torch27-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..562bf3258683ad6cc595aa771f3ba0772b909e1f
Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc differ
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc b/build/torch27-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0614a9070b49b5ba96f839d60329b733c422f48c
Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc differ
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc b/build/torch27-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e6fd366b9e45a56263407a972e98b1ffc5e2303e
Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc differ
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/paged_attention/_ops.py b/build/torch27-cxx11-cu126-x86_64-linux/paged_attention/_ops.py
index a0c7ecd509f9dc04c72c92c1c5389a8e04100860..56d920dbf5b60b10a3444a94a1035d9e72a0df99 100644
--- a/build/torch27-cxx11-cu126-x86_64-linux/paged_attention/_ops.py
+++ b/build/torch27-cxx11-cu126-x86_64-linux/paged_attention/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _paged_attention_daf6221
-ops = torch.ops._paged_attention_daf6221
+from . import _paged_attention_6677800
+ops = torch.ops._paged_attention_6677800
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_paged_attention_daf6221::{op_name}"
\ No newline at end of file
+    return f"_paged_attention_6677800::{op_name}"
\ No newline at end of file
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so b/build/torch27-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..7eb73d0955616254c8a8614ebd96d909c4078b77
--- /dev/null
+++ b/build/torch27-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e14b5caf109855ba826adef443fc4568a170ba2bbd1738b7bbdcec5e43f38cfd
+size 88425480
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so b/build/torch27-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
deleted file mode 100755
index e088068ce1f1ce65f387a21e5f80d4a8466df649..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:51a28a082905ac540fb6bb9ffeddb9786b92316150467b850aee02ae7db4efa2
-size 87803936
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/paged_attention/platforms.py b/build/torch27-cxx11-cu126-x86_64-linux/paged_attention/platforms.py
index aa06132e74cd7fb634044a76e528979b02a3559b..6277d5f50ff3ddc265bb39fa1c4d17e0341b7767 100644
--- a/build/torch27-cxx11-cu126-x86_64-linux/paged_attention/platforms.py
+++ b/build/torch27-cxx11-cu126-x86_64-linux/paged_attention/platforms.py
@@ -8,6 +8,7 @@ import numpy as np
 import torch
 
 IS_ROCM = torch.version.hip is not None
+IS_MPS = torch.backends.mps.is_available()
 
 
 class Platform(ABC):
@@ -32,6 +33,9 @@ class Platform(ABC):
     @abstractmethod
     def is_rocm(self) -> bool: ...
 
+    @abstractmethod
+    def is_mps(self) -> bool: ...
+
 
 class CudaPlatform(Platform):
     @classmethod
@@ -45,6 +49,9 @@ class CudaPlatform(Platform):
     def is_rocm(self) -> bool:
         return False
 
+    def is_mps(self) -> bool:
+        return False
+
 
 class RocmPlatform(Platform):
     @classmethod
@@ -58,5 +65,28 @@ class RocmPlatform(Platform):
     def is_rocm(self) -> bool:
         return True
 
+    def is_mps(self) -> bool:
+        return False
+
+
+class MpsPlatform(Platform):
+    @classmethod
+    @lru_cache(maxsize=8)
+    def get_device_name(cls, device_id: int = 0) -> str:
+        return torch.cuda.get_device_name(device_id)
+
+    def is_cuda(self) -> bool:
+        return False
+
+    def is_rocm(self) -> bool:
+        return False
+
+    def is_mps(self) -> bool:
+        return True
 
-current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
+current_platform = (
+    RocmPlatform() if IS_ROCM else
+    MpsPlatform() if IS_MPS else
+    CudaPlatform() if torch.cuda.is_available() else
+    None
+)
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc b/build/torch27-cxx11-cu128-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..702e7abb792e16df32b113a1305f8e38eedf536f
Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc differ
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc b/build/torch27-cxx11-cu128-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5ad287ac40a8bc17b688a8e9c94b96bcf9302839
Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc differ
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc b/build/torch27-cxx11-cu128-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..523a67b87a2fb452b488bb3d4fc4f5529a036561
Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc differ
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/paged_attention/_ops.py b/build/torch27-cxx11-cu128-x86_64-linux/paged_attention/_ops.py
index a0c7ecd509f9dc04c72c92c1c5389a8e04100860..56d920dbf5b60b10a3444a94a1035d9e72a0df99 100644
--- a/build/torch27-cxx11-cu128-x86_64-linux/paged_attention/_ops.py
+++ b/build/torch27-cxx11-cu128-x86_64-linux/paged_attention/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _paged_attention_daf6221
-ops = torch.ops._paged_attention_daf6221
+from . import _paged_attention_6677800
+ops = torch.ops._paged_attention_6677800
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_paged_attention_daf6221::{op_name}"
\ No newline at end of file
+    return f"_paged_attention_6677800::{op_name}"
\ No newline at end of file
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so b/build/torch27-cxx11-cu128-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..fcaa5bfcae4da9cb0dd496b3e539a8aa66c49731
--- /dev/null
+++ b/build/torch27-cxx11-cu128-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5747d9347ecab35a021f746cbb37b7498c8e6e29558071c5845cf2ccc613aa61
+size 120461168
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so b/build/torch27-cxx11-cu128-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
deleted file mode 100755
index c0d6b36bf3dc8edfbf8891374753e7a66c26b98a..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu128-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e24aefa6e49127a36361514b5349572a8bf1c48803892e0dd9fcd9bfd8808f77
-size 119418080
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/paged_attention/platforms.py b/build/torch27-cxx11-cu128-x86_64-linux/paged_attention/platforms.py
index aa06132e74cd7fb634044a76e528979b02a3559b..6277d5f50ff3ddc265bb39fa1c4d17e0341b7767 100644
--- a/build/torch27-cxx11-cu128-x86_64-linux/paged_attention/platforms.py
+++ b/build/torch27-cxx11-cu128-x86_64-linux/paged_attention/platforms.py
@@ -8,6 +8,7 @@ import numpy as np
 import torch
 
 IS_ROCM = torch.version.hip is not None
+IS_MPS = torch.backends.mps.is_available()
 
 
 class Platform(ABC):
@@ -32,6 +33,9 @@ class Platform(ABC):
     @abstractmethod
     def is_rocm(self) -> bool: ...
 
+    @abstractmethod
+    def is_mps(self) -> bool: ...
+
 
 class CudaPlatform(Platform):
     @classmethod
@@ -45,6 +49,9 @@ class CudaPlatform(Platform):
     def is_rocm(self) -> bool:
         return False
 
+    def is_mps(self) -> bool:
+        return False
+
 
 class RocmPlatform(Platform):
     @classmethod
@@ -58,5 +65,28 @@ class RocmPlatform(Platform):
     def is_rocm(self) -> bool:
         return True
 
+    def is_mps(self) -> bool:
+        return False
+
+
+class MpsPlatform(Platform):
+    @classmethod
+    @lru_cache(maxsize=8)
+    def get_device_name(cls, device_id: int = 0) -> str:
+        return torch.cuda.get_device_name(device_id)
+
+    def is_cuda(self) -> bool:
+        return False
+
+    def is_rocm(self) -> bool:
+        return False
+
+    def is_mps(self) -> bool:
+        return True
 
-current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
+current_platform = (
+    RocmPlatform() if IS_ROCM else
+    MpsPlatform() if IS_MPS else
+    CudaPlatform() if torch.cuda.is_available() else
+    None
+)
diff --git a/build/torch25-cxx11-cu121-x86_64-linux/paged_attention/__init__.py b/build/torch27-cxx11-rocm63-x86_64-linux/paged_attention/__init__.py
similarity index 100%
rename from build/torch25-cxx11-cu121-x86_64-linux/paged_attention/__init__.py
rename to build/torch27-cxx11-rocm63-x86_64-linux/paged_attention/__init__.py
diff --git a/build/torch27-cxx11-rocm63-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc b/build/torch27-cxx11-rocm63-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a76b5a7833d5bb073bf304e511c99a89cfe195a0
Binary files /dev/null and b/build/torch27-cxx11-rocm63-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc differ
diff --git a/build/torch27-cxx11-rocm63-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc b/build/torch27-cxx11-rocm63-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8872c0d27f9f5d0de6c7d51782a8d5066523ab7a
Binary files /dev/null and b/build/torch27-cxx11-rocm63-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc differ
diff --git a/build/torch27-cxx11-rocm63-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc b/build/torch27-cxx11-rocm63-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cd4e26d7f74f774767789700960d43827a9d3e8d
Binary files /dev/null and b/build/torch27-cxx11-rocm63-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc differ
diff --git a/build/torch25-cxx11-cu121-x86_64-linux/paged_attention/_custom_ops.py b/build/torch27-cxx11-rocm63-x86_64-linux/paged_attention/_custom_ops.py
similarity index 100%
rename from build/torch25-cxx11-cu121-x86_64-linux/paged_attention/_custom_ops.py
rename to build/torch27-cxx11-rocm63-x86_64-linux/paged_attention/_custom_ops.py
diff --git a/build/torch27-cxx11-rocm63-x86_64-linux/paged_attention/_ops.py b/build/torch27-cxx11-rocm63-x86_64-linux/paged_attention/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..56d920dbf5b60b10a3444a94a1035d9e72a0df99
--- /dev/null
+++ b/build/torch27-cxx11-rocm63-x86_64-linux/paged_attention/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _paged_attention_6677800
+ops = torch.ops._paged_attention_6677800
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_paged_attention_6677800::{op_name}"
\ No newline at end of file
diff --git a/build/torch27-cxx11-rocm63-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so b/build/torch27-cxx11-rocm63-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..af88a817b2d5bf4ade32920c6dd1af1a79cbf770
--- /dev/null
+++ b/build/torch27-cxx11-rocm63-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2905e631976eeeac3816fc1907f9c23818151ecb7e54d3fece7a42e0bcf10553
+size 120280352
diff --git a/build/torch25-cxx98-cu118-x86_64-linux/paged_attention/platforms.py b/build/torch27-cxx11-rocm63-x86_64-linux/paged_attention/platforms.py
similarity index 67%
rename from build/torch25-cxx98-cu118-x86_64-linux/paged_attention/platforms.py
rename to build/torch27-cxx11-rocm63-x86_64-linux/paged_attention/platforms.py
index aa06132e74cd7fb634044a76e528979b02a3559b..6277d5f50ff3ddc265bb39fa1c4d17e0341b7767 100644
--- a/build/torch25-cxx98-cu118-x86_64-linux/paged_attention/platforms.py
+++ b/build/torch27-cxx11-rocm63-x86_64-linux/paged_attention/platforms.py
@@ -8,6 +8,7 @@ import numpy as np
 import torch
 
 IS_ROCM = torch.version.hip is not None
+IS_MPS = torch.backends.mps.is_available()
 
 
 class Platform(ABC):
@@ -32,6 +33,9 @@ class Platform(ABC):
     @abstractmethod
     def is_rocm(self) -> bool: ...
 
+    @abstractmethod
+    def is_mps(self) -> bool: ...
+
 
 class CudaPlatform(Platform):
     @classmethod
@@ -45,6 +49,9 @@ class CudaPlatform(Platform):
     def is_rocm(self) -> bool:
         return False
 
+    def is_mps(self) -> bool:
+        return False
+
 
 class RocmPlatform(Platform):
     @classmethod
@@ -58,5 +65,28 @@ class RocmPlatform(Platform):
     def is_rocm(self) -> bool:
         return True
 
+    def is_mps(self) -> bool:
+        return False
+
+
+class MpsPlatform(Platform):
+    @classmethod
+    @lru_cache(maxsize=8)
+    def get_device_name(cls, device_id: int = 0) -> str:
+        return torch.cuda.get_device_name(device_id)
+
+    def is_cuda(self) -> bool:
+        return False
+
+    def is_rocm(self) -> bool:
+        return False
+
+    def is_mps(self) -> bool:
+        return True
 
-current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
+current_platform = (
+    RocmPlatform() if IS_ROCM else
+    MpsPlatform() if IS_MPS else
+    CudaPlatform() if torch.cuda.is_available() else
+    None
+)