danieldk HF Staff commited on 2 days ago

Commit

8b32f11

1 Parent(s): e6ce28c

Build (x86_64-linux)

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

build/torch25-cxx11-cu118-x86_64-linux/paged_attention/platforms.py +0 -62
build/torch25-cxx11-cu121-x86_64-linux/paged_attention/_ops.py +0 -9
build/torch25-cxx11-cu124-x86_64-linux/paged_attention/__init__.py +0 -21
build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_custom_ops.py +0 -173
build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_ops.py +0 -9
build/torch25-cxx11-cu124-x86_64-linux/paged_attention/platforms.py +0 -62
build/torch25-cxx98-cu118-x86_64-linux/paged_attention/__init__.py +0 -21
build/torch25-cxx98-cu118-x86_64-linux/paged_attention/_custom_ops.py +0 -173
build/torch25-cxx98-cu121-x86_64-linux/paged_attention/__init__.py +0 -21
build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_custom_ops.py +0 -173
build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_ops.py +0 -9
build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so +0 -3
build/torch25-cxx98-cu121-x86_64-linux/paged_attention/platforms.py +0 -62
build/torch25-cxx98-cu124-x86_64-linux/paged_attention/__init__.py +0 -21
build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_custom_ops.py +0 -173
build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_ops.py +0 -9
build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so +0 -3
build/torch25-cxx98-cu124-x86_64-linux/paged_attention/platforms.py +0 -62
build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc +0 -0
build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_ops.py +3 -3
build/{torch25-cxx11-cu121-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so → torch26-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so} +2 -2
build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so +0 -3
build/torch26-cxx11-cu118-x86_64-linux/paged_attention/platforms.py +31 -1
build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc +0 -0
build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_ops.py +3 -3
build/{torch25-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so → torch26-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so} +2 -2
build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so +0 -3
build/torch26-cxx11-cu124-x86_64-linux/paged_attention/platforms.py +31 -1
build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc +0 -0
build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_ops.py +3 -3
build/{torch25-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so → torch26-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so} +2 -2
build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so +0 -3
build/torch26-cxx11-cu126-x86_64-linux/paged_attention/platforms.py +31 -1
build/{torch25-cxx11-cu118-x86_64-linux → torch26-cxx11-rocm62-x86_64-linux}/paged_attention/__init__.py +0 -0
build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc +0 -0
build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
build/{torch25-cxx11-cu118-x86_64-linux → torch26-cxx11-rocm62-x86_64-linux}/paged_attention/_custom_ops.py +0 -0
build/{torch25-cxx11-cu118-x86_64-linux → torch26-cxx11-rocm62-x86_64-linux}/paged_attention/_ops.py +3 -3
build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so +3 -0
build/{torch25-cxx11-cu121-x86_64-linux → torch26-cxx11-rocm62-x86_64-linux}/paged_attention/platforms.py +31 -1
build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc +0 -0
build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0

build/torch25-cxx11-cu118-x86_64-linux/paged_attention/platforms.py DELETED Viewed

@@ -1,62 +0,0 @@
-import os
-import random
-from abc import ABC, abstractmethod
-from functools import lru_cache, wraps
-from typing import Callable, ParamSpec, TypeVar
-import numpy as np
-import torch
-IS_ROCM = torch.version.hip is not None
-class Platform(ABC):
-    @classmethod
-    def seed_everything(cls, seed: int) -> None:
-        """
-        Set the seed of each random module.
-        `torch.manual_seed` will set seed on all devices.
-        Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
-        """
-        random.seed(seed)
-        np.random.seed(seed)
-        torch.manual_seed(seed)
-    @abstractmethod
-    def get_device_name(self, device_id: int = 0) -> str: ...
-    @abstractmethod
-    def is_cuda(self) -> bool: ...
-    @abstractmethod
-    def is_rocm(self) -> bool: ...
-class CudaPlatform(Platform):
-    @classmethod
-    @lru_cache(maxsize=8)
-    def get_device_name(cls, device_id: int = 0) -> str:
-        return torch.cuda.get_device_name(0)
-    def is_cuda(self) -> bool:
-        return True
-    def is_rocm(self) -> bool:
-        return False
-class RocmPlatform(Platform):
-    @classmethod
-    @lru_cache(maxsize=8)
-    def get_device_name(cls, device_id: int = 0) -> str:
-        return torch.cuda.get_device_name(device_id)
-    def is_cuda(self) -> bool:
-        return False
-    def is_rocm(self) -> bool:
-        return True
-current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()

build/torch25-cxx11-cu121-x86_64-linux/paged_attention/_ops.py DELETED Viewed

@@ -1,9 +0,0 @@
-import torch
-from . import _paged_attention_daf6221
-ops = torch.ops._paged_attention_daf6221
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_paged_attention_daf6221::{op_name}"

build/torch25-cxx11-cu124-x86_64-linux/paged_attention/__init__.py DELETED Viewed

@@ -1,21 +0,0 @@
-from ._custom_ops import (
-    convert_fp8,
-    copy_blocks,
-    paged_attention_v1,
-    paged_attention_v2,
-    reshape_and_cache,
-    reshape_and_cache_flash,
-    swap_blocks,
-)
-from ._ops import ops
-__all__ = [
-    "convert_fp8",
-    "copy_blocks",
-    "ops",
-    "paged_attention_v1",
-    "paged_attention_v2",
-    "reshape_and_cache",
-    "reshape_and_cache_flash",
-    "swap_blocks",
-]

build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_custom_ops.py DELETED Viewed

@@ -1,173 +0,0 @@
-from typing import List, Optional
-import torch
-from ._ops import ops
-# page attention ops
-def paged_attention_v1(
-    out: torch.Tensor,
-    query: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    num_kv_heads: int,
-    scale: float,
-    block_tables: torch.Tensor,
-    seq_lens: torch.Tensor,
-    block_size: int,
-    max_seq_len: int,
-    alibi_slopes: Optional[torch.Tensor],
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-    tp_rank: int = 0,
-    blocksparse_local_blocks: int = 0,
-    blocksparse_vert_stride: int = 0,
-    blocksparse_block_size: int = 64,
-    blocksparse_head_sliding_step: int = 0,
-) -> None:
-    ops.paged_attention_v1(
-        out,
-        query,
-        key_cache,
-        value_cache,
-        num_kv_heads,
-        scale,
-        block_tables,
-        seq_lens,
-        block_size,
-        max_seq_len,
-        alibi_slopes,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-        tp_rank,
-        blocksparse_local_blocks,
-        blocksparse_vert_stride,
-        blocksparse_block_size,
-        blocksparse_head_sliding_step,
-    )
-def paged_attention_v2(
-    out: torch.Tensor,
-    exp_sum: torch.Tensor,
-    max_logits: torch.Tensor,
-    tmp_out: torch.Tensor,
-    query: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    num_kv_heads: int,
-    scale: float,
-    block_tables: torch.Tensor,
-    seq_lens: torch.Tensor,
-    block_size: int,
-    max_seq_len: int,
-    alibi_slopes: Optional[torch.Tensor],
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-    tp_rank: int = 0,
-    blocksparse_local_blocks: int = 0,
-    blocksparse_vert_stride: int = 0,
-    blocksparse_block_size: int = 64,
-    blocksparse_head_sliding_step: int = 0,
-) -> None:
-    ops.paged_attention_v2(
-        out,
-        exp_sum,
-        max_logits,
-        tmp_out,
-        query,
-        key_cache,
-        value_cache,
-        num_kv_heads,
-        scale,
-        block_tables,
-        seq_lens,
-        block_size,
-        max_seq_len,
-        alibi_slopes,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-        tp_rank,
-        blocksparse_local_blocks,
-        blocksparse_vert_stride,
-        blocksparse_block_size,
-        blocksparse_head_sliding_step,
-    )
-def reshape_and_cache(
-    key: torch.Tensor,
-    value: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    slot_mapping: torch.Tensor,
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-) -> None:
-    ops.reshape_and_cache(
-        key,
-        value,
-        key_cache,
-        value_cache,
-        slot_mapping,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-    )
-def reshape_and_cache_flash(
-    key: torch.Tensor,
-    value: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    slot_mapping: torch.Tensor,
-    kv_cache_dtype: str,
-    k_scale: torch.Tensor,
-    v_scale: torch.Tensor,
-) -> None:
-    ops.reshape_and_cache_flash(
-        key,
-        value,
-        key_cache,
-        value_cache,
-        slot_mapping,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-    )
-def copy_blocks(
-    key_caches: List[torch.Tensor],
-    value_caches: List[torch.Tensor],
-    block_mapping: torch.Tensor,
-) -> None:
-    ops.copy_blocks(key_caches, value_caches, block_mapping)
-def swap_blocks(
-    src: torch.Tensor, dst: torch.Tensor, block_mapping: torch.Tensor
-) -> None:
-    ops.swap_blocks(src, dst, block_mapping)
-def convert_fp8(
-    output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8"
-) -> None:
-    ops.convert_fp8(output, input, scale, kv_dtype)
-__all__ = [
-    "convert_fp8",
-    "paged_attention_v1",
-    "paged_attention_v2",
-    "reshape_and_cache",
-    "copy_blocks",
-]

build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_ops.py DELETED Viewed

@@ -1,9 +0,0 @@
-import torch
-from . import _paged_attention_daf6221
-ops = torch.ops._paged_attention_daf6221
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_paged_attention_daf6221::{op_name}"

build/torch25-cxx11-cu124-x86_64-linux/paged_attention/platforms.py DELETED Viewed

@@ -1,62 +0,0 @@
-import os
-import random
-from abc import ABC, abstractmethod
-from functools import lru_cache, wraps
-from typing import Callable, ParamSpec, TypeVar
-import numpy as np
-import torch
-IS_ROCM = torch.version.hip is not None
-class Platform(ABC):
-    @classmethod
-    def seed_everything(cls, seed: int) -> None:
-        """
-        Set the seed of each random module.
-        `torch.manual_seed` will set seed on all devices.
-        Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
-        """
-        random.seed(seed)
-        np.random.seed(seed)
-        torch.manual_seed(seed)
-    @abstractmethod
-    def get_device_name(self, device_id: int = 0) -> str: ...
-    @abstractmethod
-    def is_cuda(self) -> bool: ...
-    @abstractmethod
-    def is_rocm(self) -> bool: ...
-class CudaPlatform(Platform):
-    @classmethod
-    @lru_cache(maxsize=8)
-    def get_device_name(cls, device_id: int = 0) -> str:
-        return torch.cuda.get_device_name(0)
-    def is_cuda(self) -> bool:
-        return True
-    def is_rocm(self) -> bool:
-        return False
-class RocmPlatform(Platform):
-    @classmethod
-    @lru_cache(maxsize=8)
-    def get_device_name(cls, device_id: int = 0) -> str:
-        return torch.cuda.get_device_name(device_id)
-    def is_cuda(self) -> bool:
-        return False
-    def is_rocm(self) -> bool:
-        return True
-current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()

build/torch25-cxx98-cu118-x86_64-linux/paged_attention/__init__.py DELETED Viewed

@@ -1,21 +0,0 @@
-from ._custom_ops import (
-    convert_fp8,
-    copy_blocks,
-    paged_attention_v1,
-    paged_attention_v2,
-    reshape_and_cache,
-    reshape_and_cache_flash,
-    swap_blocks,
-)
-from ._ops import ops
-__all__ = [
-    "convert_fp8",
-    "copy_blocks",
-    "ops",
-    "paged_attention_v1",
-    "paged_attention_v2",
-    "reshape_and_cache",
-    "reshape_and_cache_flash",
-    "swap_blocks",
-]

build/torch25-cxx98-cu118-x86_64-linux/paged_attention/_custom_ops.py DELETED Viewed

@@ -1,173 +0,0 @@
-from typing import List, Optional
-import torch
-from ._ops import ops
-# page attention ops
-def paged_attention_v1(
-    out: torch.Tensor,
-    query: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    num_kv_heads: int,
-    scale: float,
-    block_tables: torch.Tensor,
-    seq_lens: torch.Tensor,
-    block_size: int,
-    max_seq_len: int,
-    alibi_slopes: Optional[torch.Tensor],
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-    tp_rank: int = 0,
-    blocksparse_local_blocks: int = 0,
-    blocksparse_vert_stride: int = 0,
-    blocksparse_block_size: int = 64,
-    blocksparse_head_sliding_step: int = 0,
-) -> None:
-    ops.paged_attention_v1(
-        out,
-        query,
-        key_cache,
-        value_cache,
-        num_kv_heads,
-        scale,
-        block_tables,
-        seq_lens,
-        block_size,
-        max_seq_len,
-        alibi_slopes,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-        tp_rank,
-        blocksparse_local_blocks,
-        blocksparse_vert_stride,
-        blocksparse_block_size,
-        blocksparse_head_sliding_step,
-    )
-def paged_attention_v2(
-    out: torch.Tensor,
-    exp_sum: torch.Tensor,
-    max_logits: torch.Tensor,
-    tmp_out: torch.Tensor,
-    query: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    num_kv_heads: int,
-    scale: float,
-    block_tables: torch.Tensor,
-    seq_lens: torch.Tensor,
-    block_size: int,
-    max_seq_len: int,
-    alibi_slopes: Optional[torch.Tensor],
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-    tp_rank: int = 0,
-    blocksparse_local_blocks: int = 0,
-    blocksparse_vert_stride: int = 0,
-    blocksparse_block_size: int = 64,
-    blocksparse_head_sliding_step: int = 0,
-) -> None:
-    ops.paged_attention_v2(
-        out,
-        exp_sum,
-        max_logits,
-        tmp_out,
-        query,
-        key_cache,
-        value_cache,
-        num_kv_heads,
-        scale,
-        block_tables,
-        seq_lens,
-        block_size,
-        max_seq_len,
-        alibi_slopes,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-        tp_rank,
-        blocksparse_local_blocks,
-        blocksparse_vert_stride,
-        blocksparse_block_size,
-        blocksparse_head_sliding_step,
-    )
-def reshape_and_cache(
-    key: torch.Tensor,
-    value: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    slot_mapping: torch.Tensor,
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-) -> None:
-    ops.reshape_and_cache(
-        key,
-        value,
-        key_cache,
-        value_cache,
-        slot_mapping,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-    )
-def reshape_and_cache_flash(
-    key: torch.Tensor,
-    value: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    slot_mapping: torch.Tensor,
-    kv_cache_dtype: str,
-    k_scale: torch.Tensor,
-    v_scale: torch.Tensor,
-) -> None:
-    ops.reshape_and_cache_flash(
-        key,
-        value,
-        key_cache,
-        value_cache,
-        slot_mapping,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-    )
-def copy_blocks(
-    key_caches: List[torch.Tensor],
-    value_caches: List[torch.Tensor],
-    block_mapping: torch.Tensor,
-) -> None:
-    ops.copy_blocks(key_caches, value_caches, block_mapping)
-def swap_blocks(
-    src: torch.Tensor, dst: torch.Tensor, block_mapping: torch.Tensor
-) -> None:
-    ops.swap_blocks(src, dst, block_mapping)
-def convert_fp8(
-    output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8"
-) -> None:
-    ops.convert_fp8(output, input, scale, kv_dtype)
-__all__ = [
-    "convert_fp8",
-    "paged_attention_v1",
-    "paged_attention_v2",
-    "reshape_and_cache",
-    "copy_blocks",
-]

build/torch25-cxx98-cu121-x86_64-linux/paged_attention/__init__.py DELETED Viewed

@@ -1,21 +0,0 @@
-from ._custom_ops import (
-    convert_fp8,
-    copy_blocks,
-    paged_attention_v1,
-    paged_attention_v2,
-    reshape_and_cache,
-    reshape_and_cache_flash,
-    swap_blocks,
-)
-from ._ops import ops
-__all__ = [
-    "convert_fp8",
-    "copy_blocks",
-    "ops",
-    "paged_attention_v1",
-    "paged_attention_v2",
-    "reshape_and_cache",
-    "reshape_and_cache_flash",
-    "swap_blocks",
-]

build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_custom_ops.py DELETED Viewed

@@ -1,173 +0,0 @@
-from typing import List, Optional
-import torch
-from ._ops import ops
-# page attention ops
-def paged_attention_v1(
-    out: torch.Tensor,
-    query: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    num_kv_heads: int,
-    scale: float,
-    block_tables: torch.Tensor,
-    seq_lens: torch.Tensor,
-    block_size: int,
-    max_seq_len: int,
-    alibi_slopes: Optional[torch.Tensor],
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-    tp_rank: int = 0,
-    blocksparse_local_blocks: int = 0,
-    blocksparse_vert_stride: int = 0,
-    blocksparse_block_size: int = 64,
-    blocksparse_head_sliding_step: int = 0,
-) -> None:
-    ops.paged_attention_v1(
-        out,
-        query,
-        key_cache,
-        value_cache,
-        num_kv_heads,
-        scale,
-        block_tables,
-        seq_lens,
-        block_size,
-        max_seq_len,
-        alibi_slopes,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-        tp_rank,
-        blocksparse_local_blocks,
-        blocksparse_vert_stride,
-        blocksparse_block_size,
-        blocksparse_head_sliding_step,
-    )
-def paged_attention_v2(
-    out: torch.Tensor,
-    exp_sum: torch.Tensor,
-    max_logits: torch.Tensor,
-    tmp_out: torch.Tensor,
-    query: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    num_kv_heads: int,
-    scale: float,
-    block_tables: torch.Tensor,
-    seq_lens: torch.Tensor,
-    block_size: int,
-    max_seq_len: int,
-    alibi_slopes: Optional[torch.Tensor],
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-    tp_rank: int = 0,
-    blocksparse_local_blocks: int = 0,
-    blocksparse_vert_stride: int = 0,
-    blocksparse_block_size: int = 64,
-    blocksparse_head_sliding_step: int = 0,
-) -> None:
-    ops.paged_attention_v2(
-        out,
-        exp_sum,
-        max_logits,
-        tmp_out,
-        query,
-        key_cache,
-        value_cache,
-        num_kv_heads,
-        scale,
-        block_tables,
-        seq_lens,
-        block_size,
-        max_seq_len,
-        alibi_slopes,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-        tp_rank,
-        blocksparse_local_blocks,
-        blocksparse_vert_stride,
-        blocksparse_block_size,
-        blocksparse_head_sliding_step,
-    )
-def reshape_and_cache(
-    key: torch.Tensor,
-    value: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    slot_mapping: torch.Tensor,
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-) -> None:
-    ops.reshape_and_cache(
-        key,
-        value,
-        key_cache,
-        value_cache,
-        slot_mapping,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-    )
-def reshape_and_cache_flash(
-    key: torch.Tensor,
-    value: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    slot_mapping: torch.Tensor,
-    kv_cache_dtype: str,
-    k_scale: torch.Tensor,
-    v_scale: torch.Tensor,
-) -> None:
-    ops.reshape_and_cache_flash(
-        key,
-        value,
-        key_cache,
-        value_cache,
-        slot_mapping,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-    )
-def copy_blocks(
-    key_caches: List[torch.Tensor],
-    value_caches: List[torch.Tensor],
-    block_mapping: torch.Tensor,
-) -> None:
-    ops.copy_blocks(key_caches, value_caches, block_mapping)
-def swap_blocks(
-    src: torch.Tensor, dst: torch.Tensor, block_mapping: torch.Tensor
-) -> None:
-    ops.swap_blocks(src, dst, block_mapping)
-def convert_fp8(
-    output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8"
-) -> None:
-    ops.convert_fp8(output, input, scale, kv_dtype)
-__all__ = [
-    "convert_fp8",
-    "paged_attention_v1",
-    "paged_attention_v2",
-    "reshape_and_cache",
-    "copy_blocks",
-]

build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_ops.py DELETED Viewed

@@ -1,9 +0,0 @@
-import torch
-from . import _paged_attention_daf6221
-ops = torch.ops._paged_attention_daf6221
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_paged_attention_daf6221::{op_name}"

build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:684a1670bd113fee852580728961adcc53b7adb8e563aa672df223e7bff0c9a6
-size 87913456

build/torch25-cxx98-cu121-x86_64-linux/paged_attention/platforms.py DELETED Viewed

@@ -1,62 +0,0 @@
-import os
-import random
-from abc import ABC, abstractmethod
-from functools import lru_cache, wraps
-from typing import Callable, ParamSpec, TypeVar
-import numpy as np
-import torch
-IS_ROCM = torch.version.hip is not None
-class Platform(ABC):
-    @classmethod
-    def seed_everything(cls, seed: int) -> None:
-        """
-        Set the seed of each random module.
-        `torch.manual_seed` will set seed on all devices.
-        Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
-        """
-        random.seed(seed)
-        np.random.seed(seed)
-        torch.manual_seed(seed)
-    @abstractmethod
-    def get_device_name(self, device_id: int = 0) -> str: ...
-    @abstractmethod
-    def is_cuda(self) -> bool: ...
-    @abstractmethod
-    def is_rocm(self) -> bool: ...
-class CudaPlatform(Platform):
-    @classmethod
-    @lru_cache(maxsize=8)
-    def get_device_name(cls, device_id: int = 0) -> str:
-        return torch.cuda.get_device_name(0)
-    def is_cuda(self) -> bool:
-        return True
-    def is_rocm(self) -> bool:
-        return False
-class RocmPlatform(Platform):
-    @classmethod
-    @lru_cache(maxsize=8)
-    def get_device_name(cls, device_id: int = 0) -> str:
-        return torch.cuda.get_device_name(device_id)
-    def is_cuda(self) -> bool:
-        return False
-    def is_rocm(self) -> bool:
-        return True
-current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()

build/torch25-cxx98-cu124-x86_64-linux/paged_attention/__init__.py DELETED Viewed

@@ -1,21 +0,0 @@
-from ._custom_ops import (
-    convert_fp8,
-    copy_blocks,
-    paged_attention_v1,
-    paged_attention_v2,
-    reshape_and_cache,
-    reshape_and_cache_flash,
-    swap_blocks,
-)
-from ._ops import ops
-__all__ = [
-    "convert_fp8",
-    "copy_blocks",
-    "ops",
-    "paged_attention_v1",
-    "paged_attention_v2",
-    "reshape_and_cache",
-    "reshape_and_cache_flash",
-    "swap_blocks",
-]

build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_custom_ops.py DELETED Viewed

@@ -1,173 +0,0 @@
-from typing import List, Optional
-import torch
-from ._ops import ops
-# page attention ops
-def paged_attention_v1(
-    out: torch.Tensor,
-    query: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    num_kv_heads: int,
-    scale: float,
-    block_tables: torch.Tensor,
-    seq_lens: torch.Tensor,
-    block_size: int,
-    max_seq_len: int,
-    alibi_slopes: Optional[torch.Tensor],
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-    tp_rank: int = 0,
-    blocksparse_local_blocks: int = 0,
-    blocksparse_vert_stride: int = 0,
-    blocksparse_block_size: int = 64,
-    blocksparse_head_sliding_step: int = 0,
-) -> None:
-    ops.paged_attention_v1(
-        out,
-        query,
-        key_cache,
-        value_cache,
-        num_kv_heads,
-        scale,
-        block_tables,
-        seq_lens,
-        block_size,
-        max_seq_len,
-        alibi_slopes,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-        tp_rank,
-        blocksparse_local_blocks,
-        blocksparse_vert_stride,
-        blocksparse_block_size,
-        blocksparse_head_sliding_step,
-    )
-def paged_attention_v2(
-    out: torch.Tensor,
-    exp_sum: torch.Tensor,
-    max_logits: torch.Tensor,
-    tmp_out: torch.Tensor,
-    query: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    num_kv_heads: int,
-    scale: float,
-    block_tables: torch.Tensor,
-    seq_lens: torch.Tensor,
-    block_size: int,
-    max_seq_len: int,
-    alibi_slopes: Optional[torch.Tensor],
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-    tp_rank: int = 0,
-    blocksparse_local_blocks: int = 0,
-    blocksparse_vert_stride: int = 0,
-    blocksparse_block_size: int = 64,
-    blocksparse_head_sliding_step: int = 0,
-) -> None:
-    ops.paged_attention_v2(
-        out,
-        exp_sum,
-        max_logits,
-        tmp_out,
-        query,
-        key_cache,
-        value_cache,
-        num_kv_heads,
-        scale,
-        block_tables,
-        seq_lens,
-        block_size,
-        max_seq_len,
-        alibi_slopes,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-        tp_rank,
-        blocksparse_local_blocks,
-        blocksparse_vert_stride,
-        blocksparse_block_size,
-        blocksparse_head_sliding_step,
-    )
-def reshape_and_cache(
-    key: torch.Tensor,
-    value: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    slot_mapping: torch.Tensor,
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-) -> None:
-    ops.reshape_and_cache(
-        key,
-        value,
-        key_cache,
-        value_cache,
-        slot_mapping,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-    )
-def reshape_and_cache_flash(
-    key: torch.Tensor,
-    value: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    slot_mapping: torch.Tensor,
-    kv_cache_dtype: str,
-    k_scale: torch.Tensor,
-    v_scale: torch.Tensor,
-) -> None:
-    ops.reshape_and_cache_flash(
-        key,
-        value,
-        key_cache,
-        value_cache,
-        slot_mapping,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-    )
-def copy_blocks(
-    key_caches: List[torch.Tensor],
-    value_caches: List[torch.Tensor],
-    block_mapping: torch.Tensor,
-) -> None:
-    ops.copy_blocks(key_caches, value_caches, block_mapping)
-def swap_blocks(
-    src: torch.Tensor, dst: torch.Tensor, block_mapping: torch.Tensor
-) -> None:
-    ops.swap_blocks(src, dst, block_mapping)
-def convert_fp8(
-    output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8"
-) -> None:
-    ops.convert_fp8(output, input, scale, kv_dtype)
-__all__ = [
-    "convert_fp8",
-    "paged_attention_v1",
-    "paged_attention_v2",
-    "reshape_and_cache",
-    "copy_blocks",
-]

build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_ops.py DELETED Viewed

@@ -1,9 +0,0 @@
-import torch
-from . import _paged_attention_daf6221
-ops = torch.ops._paged_attention_daf6221
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_paged_attention_daf6221::{op_name}"

build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d4fb3e5dd163dfce3d3032ff1969be0e96eff1568312935c8747beb402d6a2fd
-size 88021624

build/torch25-cxx98-cu124-x86_64-linux/paged_attention/platforms.py DELETED Viewed

@@ -1,62 +0,0 @@
-import os
-import random
-from abc import ABC, abstractmethod
-from functools import lru_cache, wraps
-from typing import Callable, ParamSpec, TypeVar
-import numpy as np
-import torch
-IS_ROCM = torch.version.hip is not None
-class Platform(ABC):
-    @classmethod
-    def seed_everything(cls, seed: int) -> None:
-        """
-        Set the seed of each random module.
-        `torch.manual_seed` will set seed on all devices.
-        Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
-        """
-        random.seed(seed)
-        np.random.seed(seed)
-        torch.manual_seed(seed)
-    @abstractmethod
-    def get_device_name(self, device_id: int = 0) -> str: ...
-    @abstractmethod
-    def is_cuda(self) -> bool: ...
-    @abstractmethod
-    def is_rocm(self) -> bool: ...
-class CudaPlatform(Platform):
-    @classmethod
-    @lru_cache(maxsize=8)
-    def get_device_name(cls, device_id: int = 0) -> str:
-        return torch.cuda.get_device_name(0)
-    def is_cuda(self) -> bool:
-        return True
-    def is_rocm(self) -> bool:
-        return False
-class RocmPlatform(Platform):
-    @classmethod
-    @lru_cache(maxsize=8)
-    def get_device_name(cls, device_id: int = 0) -> str:
-        return torch.cuda.get_device_name(device_id)
-    def is_cuda(self) -> bool:
-        return False
-    def is_rocm(self) -> bool:
-        return True
-current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()

build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (509 Bytes). View file

build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc ADDED Viewed

Binary file (4.7 kB). View file

build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc ADDED Viewed

Binary file (547 Bytes). View file

build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _paged_attention_daf6221
-ops = torch.ops._paged_attention_daf6221
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_paged_attention_daf6221::{op_name}"

 import torch
+from . import _paged_attention_6677800
+ops = torch.ops._paged_attention_6677800
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_paged_attention_6677800::{op_name}"

build/{torch25-cxx11-cu121-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so → torch26-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2156391e81ab2d76386131056f595c50c00f0d68c2427ab0872a457385f3c804
-size 87948952

 version https://git-lfs.github.com/spec/v1
+oid sha256:08e178e566aba62bbe98ba07bb0f83784095cb0d2fec7946a8d5773ca8e550ae
+size 91845160

build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:14b647a190ade84ab061629eb9cfbf7bf104e653c6389609e85e92fa359a828e
-size 91219624

build/torch26-cxx11-cu118-x86_64-linux/paged_attention/platforms.py CHANGED Viewed

@@ -8,6 +8,7 @@ import numpy as np
 import torch
 IS_ROCM = torch.version.hip is not None
 class Platform(ABC):
@@ -32,6 +33,9 @@ class Platform(ABC):
     @abstractmethod
     def is_rocm(self) -> bool: ...
 class CudaPlatform(Platform):
     @classmethod
@@ -45,6 +49,9 @@ class CudaPlatform(Platform):
     def is_rocm(self) -> bool:
         return False
 class RocmPlatform(Platform):
     @classmethod
@@ -58,5 +65,28 @@ class RocmPlatform(Platform):
     def is_rocm(self) -> bool:
         return True
-current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()

 import torch
 IS_ROCM = torch.version.hip is not None
+IS_MPS = torch.backends.mps.is_available()
 class Platform(ABC):
     @abstractmethod
     def is_rocm(self) -> bool: ...
+    @abstractmethod
+    def is_mps(self) -> bool: ...
 class CudaPlatform(Platform):
     @classmethod
     def is_rocm(self) -> bool:
         return False
+    def is_mps(self) -> bool:
+        return False
 class RocmPlatform(Platform):
     @classmethod
     def is_rocm(self) -> bool:
         return True
+    def is_mps(self) -> bool:
+        return False
+class MpsPlatform(Platform):
+    @classmethod
+    @lru_cache(maxsize=8)
+    def get_device_name(cls, device_id: int = 0) -> str:
+        return torch.cuda.get_device_name(device_id)
+    def is_cuda(self) -> bool:
+        return False
+    def is_rocm(self) -> bool:
+        return False
+    def is_mps(self) -> bool:
+        return True
+current_platform = (
+    RocmPlatform() if IS_ROCM else
+    MpsPlatform() if IS_MPS else
+    CudaPlatform() if torch.cuda.is_available() else
+    None
+)

build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (509 Bytes). View file

build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc ADDED Viewed

Binary file (4.7 kB). View file

build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc ADDED Viewed

Binary file (547 Bytes). View file

build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _paged_attention_daf6221
-ops = torch.ops._paged_attention_daf6221
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_paged_attention_daf6221::{op_name}"

 import torch
+from . import _paged_attention_6677800
+ops = torch.ops._paged_attention_6677800
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_paged_attention_6677800::{op_name}"

build/{torch25-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so → torch26-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:353c8fd8c18d99439153c6db3dc20b5f1834d6a6ba64e9d8f9a03cc5c196d2c2
-size 91244024

 version https://git-lfs.github.com/spec/v1
+oid sha256:1bc02dd09d997be7c2d3c996d5716ff269b4e4094b6cab70f4ae73c3763c36aa
+size 88666456

build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b7faf4c16cf3cae3d4feead6195ece3f0865e602df4b04804e075697b72d5afa
-size 88040824

build/torch26-cxx11-cu124-x86_64-linux/paged_attention/platforms.py CHANGED Viewed

@@ -8,6 +8,7 @@ import numpy as np
 import torch
 IS_ROCM = torch.version.hip is not None
 class Platform(ABC):
@@ -32,6 +33,9 @@ class Platform(ABC):
     @abstractmethod
     def is_rocm(self) -> bool: ...
 class CudaPlatform(Platform):
     @classmethod
@@ -45,6 +49,9 @@ class CudaPlatform(Platform):
     def is_rocm(self) -> bool:
         return False
 class RocmPlatform(Platform):
     @classmethod
@@ -58,5 +65,28 @@ class RocmPlatform(Platform):
     def is_rocm(self) -> bool:
         return True
-current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()

 import torch
 IS_ROCM = torch.version.hip is not None
+IS_MPS = torch.backends.mps.is_available()
 class Platform(ABC):
     @abstractmethod
     def is_rocm(self) -> bool: ...
+    @abstractmethod
+    def is_mps(self) -> bool: ...
 class CudaPlatform(Platform):
     @classmethod
     def is_rocm(self) -> bool:
         return False
+    def is_mps(self) -> bool:
+        return False
 class RocmPlatform(Platform):
     @classmethod
     def is_rocm(self) -> bool:
         return True
+    def is_mps(self) -> bool:
+        return False
+class MpsPlatform(Platform):
+    @classmethod
+    @lru_cache(maxsize=8)
+    def get_device_name(cls, device_id: int = 0) -> str:
+        return torch.cuda.get_device_name(device_id)
+    def is_cuda(self) -> bool:
+        return False
+    def is_rocm(self) -> bool:
+        return False
+    def is_mps(self) -> bool:
+        return True
+current_platform = (
+    RocmPlatform() if IS_ROCM else
+    MpsPlatform() if IS_MPS else
+    CudaPlatform() if torch.cuda.is_available() else
+    None
+)

build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (509 Bytes). View file

build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc ADDED Viewed

Binary file (4.7 kB). View file

build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc ADDED Viewed

Binary file (547 Bytes). View file

build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _paged_attention_daf6221
-ops = torch.ops._paged_attention_daf6221
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_paged_attention_daf6221::{op_name}"

 import torch
+from . import _paged_attention_6677800
+ops = torch.ops._paged_attention_6677800
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_paged_attention_6677800::{op_name}"

build/{torch25-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so → torch26-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d7ddfc4079b69c3e62274797778d5083c0ebbcc9c8d91a13da296d07ea2b4f2e
-size 88057032

 version https://git-lfs.github.com/spec/v1
+oid sha256:5077b6b8fffb349c79738c345b02903f643aa9530d10269ea143e8f3125d10e9
+size 88425448

build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b6b6d748077a864b5f3a9b5973cf453d00a685b70654b5a40630891a86d19230
-size 87803904

build/torch26-cxx11-cu126-x86_64-linux/paged_attention/platforms.py CHANGED Viewed

@@ -8,6 +8,7 @@ import numpy as np
 import torch
 IS_ROCM = torch.version.hip is not None
 class Platform(ABC):
@@ -32,6 +33,9 @@ class Platform(ABC):
     @abstractmethod
     def is_rocm(self) -> bool: ...
 class CudaPlatform(Platform):
     @classmethod
@@ -45,6 +49,9 @@ class CudaPlatform(Platform):
     def is_rocm(self) -> bool:
         return False
 class RocmPlatform(Platform):
     @classmethod
@@ -58,5 +65,28 @@ class RocmPlatform(Platform):
     def is_rocm(self) -> bool:
         return True
-current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()

 import torch
 IS_ROCM = torch.version.hip is not None
+IS_MPS = torch.backends.mps.is_available()
 class Platform(ABC):
     @abstractmethod
     def is_rocm(self) -> bool: ...
+    @abstractmethod
+    def is_mps(self) -> bool: ...
 class CudaPlatform(Platform):
     @classmethod
     def is_rocm(self) -> bool:
         return False
+    def is_mps(self) -> bool:
+        return False
 class RocmPlatform(Platform):
     @classmethod
     def is_rocm(self) -> bool:
         return True
+    def is_mps(self) -> bool:
+        return False
+class MpsPlatform(Platform):
+    @classmethod
+    @lru_cache(maxsize=8)
+    def get_device_name(cls, device_id: int = 0) -> str:
+        return torch.cuda.get_device_name(device_id)
+    def is_cuda(self) -> bool:
+        return False
+    def is_rocm(self) -> bool:
+        return False
+    def is_mps(self) -> bool:
+        return True
+current_platform = (
+    RocmPlatform() if IS_ROCM else
+    MpsPlatform() if IS_MPS else
+    CudaPlatform() if torch.cuda.is_available() else
+    None
+)

build/{torch25-cxx11-cu118-x86_64-linux → torch26-cxx11-rocm62-x86_64-linux}/paged_attention/__init__.py RENAMED Viewed

File without changes

build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (510 Bytes). View file

build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc ADDED Viewed

Binary file (4.71 kB). View file

build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc ADDED Viewed

Binary file (548 Bytes). View file

build/{torch25-cxx11-cu118-x86_64-linux → torch26-cxx11-rocm62-x86_64-linux}/paged_attention/_custom_ops.py RENAMED Viewed

File without changes

build/{torch25-cxx11-cu118-x86_64-linux → torch26-cxx11-rocm62-x86_64-linux}/paged_attention/_ops.py RENAMED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _paged_attention_daf6221
-ops = torch.ops._paged_attention_daf6221
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_paged_attention_daf6221::{op_name}"

 import torch
+from . import _paged_attention_6677800
+ops = torch.ops._paged_attention_6677800
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_paged_attention_6677800::{op_name}"

build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b4781f634ed1cceabfe9b00e485748ac4e179714ec5e8a3a67475b5a1072fb5
+size 133021344

build/{torch25-cxx11-cu121-x86_64-linux → torch26-cxx11-rocm62-x86_64-linux}/paged_attention/platforms.py RENAMED Viewed

@@ -8,6 +8,7 @@ import numpy as np
 import torch
 IS_ROCM = torch.version.hip is not None
 class Platform(ABC):
@@ -32,6 +33,9 @@ class Platform(ABC):
     @abstractmethod
     def is_rocm(self) -> bool: ...
 class CudaPlatform(Platform):
     @classmethod
@@ -45,6 +49,9 @@ class CudaPlatform(Platform):
     def is_rocm(self) -> bool:
         return False
 class RocmPlatform(Platform):
     @classmethod
@@ -58,5 +65,28 @@ class RocmPlatform(Platform):
     def is_rocm(self) -> bool:
         return True
-current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()

 import torch
 IS_ROCM = torch.version.hip is not None
+IS_MPS = torch.backends.mps.is_available()
 class Platform(ABC):
     @abstractmethod
     def is_rocm(self) -> bool: ...
+    @abstractmethod
+    def is_mps(self) -> bool: ...
 class CudaPlatform(Platform):
     @classmethod
     def is_rocm(self) -> bool:
         return False
+    def is_mps(self) -> bool:
+        return False
 class RocmPlatform(Platform):
     @classmethod
     def is_rocm(self) -> bool:
         return True
+    def is_mps(self) -> bool:
+        return False
+class MpsPlatform(Platform):
+    @classmethod
+    @lru_cache(maxsize=8)
+    def get_device_name(cls, device_id: int = 0) -> str:
+        return torch.cuda.get_device_name(device_id)
+    def is_cuda(self) -> bool:
+        return False
+    def is_rocm(self) -> bool:
+        return False
+    def is_mps(self) -> bool:
+        return True
+current_platform = (
+    RocmPlatform() if IS_ROCM else
+    MpsPlatform() if IS_MPS else
+    CudaPlatform() if torch.cuda.is_available() else
+    None
+)

build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (509 Bytes). View file

build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc ADDED Viewed

Binary file (4.7 kB). View file

build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc ADDED Viewed

Binary file (547 Bytes). View file