Build (x86_64-linux)
Browse filesThis view is limited to 50 files because it contains too many changes. Β
See raw diff
- build/torch25-cxx11-cu118-x86_64-linux/paged_attention/platforms.py +0 -62
- build/torch25-cxx11-cu121-x86_64-linux/paged_attention/_ops.py +0 -9
- build/torch25-cxx11-cu124-x86_64-linux/paged_attention/__init__.py +0 -21
- build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_custom_ops.py +0 -173
- build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_ops.py +0 -9
- build/torch25-cxx11-cu124-x86_64-linux/paged_attention/platforms.py +0 -62
- build/torch25-cxx98-cu118-x86_64-linux/paged_attention/__init__.py +0 -21
- build/torch25-cxx98-cu118-x86_64-linux/paged_attention/_custom_ops.py +0 -173
- build/torch25-cxx98-cu121-x86_64-linux/paged_attention/__init__.py +0 -21
- build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_custom_ops.py +0 -173
- build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_ops.py +0 -9
- build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so +0 -3
- build/torch25-cxx98-cu121-x86_64-linux/paged_attention/platforms.py +0 -62
- build/torch25-cxx98-cu124-x86_64-linux/paged_attention/__init__.py +0 -21
- build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_custom_ops.py +0 -173
- build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_ops.py +0 -9
- build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so +0 -3
- build/torch25-cxx98-cu124-x86_64-linux/paged_attention/platforms.py +0 -62
- build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc +0 -0
- build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
- build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
- build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_ops.py +3 -3
- build/{torch25-cxx11-cu121-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so β torch26-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so} +2 -2
- build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so +0 -3
- build/torch26-cxx11-cu118-x86_64-linux/paged_attention/platforms.py +31 -1
- build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc +0 -0
- build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
- build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
- build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_ops.py +3 -3
- build/{torch25-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so β torch26-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so} +2 -2
- build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so +0 -3
- build/torch26-cxx11-cu124-x86_64-linux/paged_attention/platforms.py +31 -1
- build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc +0 -0
- build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
- build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
- build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_ops.py +3 -3
- build/{torch25-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so β torch26-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so} +2 -2
- build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so +0 -3
- build/torch26-cxx11-cu126-x86_64-linux/paged_attention/platforms.py +31 -1
- build/{torch25-cxx11-cu118-x86_64-linux β torch26-cxx11-rocm62-x86_64-linux}/paged_attention/__init__.py +0 -0
- build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc +0 -0
- build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
- build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
- build/{torch25-cxx11-cu118-x86_64-linux β torch26-cxx11-rocm62-x86_64-linux}/paged_attention/_custom_ops.py +0 -0
- build/{torch25-cxx11-cu118-x86_64-linux β torch26-cxx11-rocm62-x86_64-linux}/paged_attention/_ops.py +3 -3
- build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so +3 -0
- build/{torch25-cxx11-cu121-x86_64-linux β torch26-cxx11-rocm62-x86_64-linux}/paged_attention/platforms.py +31 -1
- build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc +0 -0
- build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
- build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
build/torch25-cxx11-cu118-x86_64-linux/paged_attention/platforms.py
DELETED
@@ -1,62 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import random
|
3 |
-
from abc import ABC, abstractmethod
|
4 |
-
from functools import lru_cache, wraps
|
5 |
-
from typing import Callable, ParamSpec, TypeVar
|
6 |
-
|
7 |
-
import numpy as np
|
8 |
-
import torch
|
9 |
-
|
10 |
-
IS_ROCM = torch.version.hip is not None
|
11 |
-
|
12 |
-
|
13 |
-
class Platform(ABC):
|
14 |
-
@classmethod
|
15 |
-
def seed_everything(cls, seed: int) -> None:
|
16 |
-
"""
|
17 |
-
Set the seed of each random module.
|
18 |
-
`torch.manual_seed` will set seed on all devices.
|
19 |
-
|
20 |
-
Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
|
21 |
-
"""
|
22 |
-
random.seed(seed)
|
23 |
-
np.random.seed(seed)
|
24 |
-
torch.manual_seed(seed)
|
25 |
-
|
26 |
-
@abstractmethod
|
27 |
-
def get_device_name(self, device_id: int = 0) -> str: ...
|
28 |
-
|
29 |
-
@abstractmethod
|
30 |
-
def is_cuda(self) -> bool: ...
|
31 |
-
|
32 |
-
@abstractmethod
|
33 |
-
def is_rocm(self) -> bool: ...
|
34 |
-
|
35 |
-
|
36 |
-
class CudaPlatform(Platform):
|
37 |
-
@classmethod
|
38 |
-
@lru_cache(maxsize=8)
|
39 |
-
def get_device_name(cls, device_id: int = 0) -> str:
|
40 |
-
return torch.cuda.get_device_name(0)
|
41 |
-
|
42 |
-
def is_cuda(self) -> bool:
|
43 |
-
return True
|
44 |
-
|
45 |
-
def is_rocm(self) -> bool:
|
46 |
-
return False
|
47 |
-
|
48 |
-
|
49 |
-
class RocmPlatform(Platform):
|
50 |
-
@classmethod
|
51 |
-
@lru_cache(maxsize=8)
|
52 |
-
def get_device_name(cls, device_id: int = 0) -> str:
|
53 |
-
return torch.cuda.get_device_name(device_id)
|
54 |
-
|
55 |
-
def is_cuda(self) -> bool:
|
56 |
-
return False
|
57 |
-
|
58 |
-
def is_rocm(self) -> bool:
|
59 |
-
return True
|
60 |
-
|
61 |
-
|
62 |
-
current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
build/torch25-cxx11-cu121-x86_64-linux/paged_attention/_ops.py
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
from . import _paged_attention_daf6221
|
3 |
-
ops = torch.ops._paged_attention_daf6221
|
4 |
-
|
5 |
-
def add_op_namespace_prefix(op_name: str):
|
6 |
-
"""
|
7 |
-
Prefix op by namespace.
|
8 |
-
"""
|
9 |
-
return f"_paged_attention_daf6221::{op_name}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
build/torch25-cxx11-cu124-x86_64-linux/paged_attention/__init__.py
DELETED
@@ -1,21 +0,0 @@
|
|
1 |
-
from ._custom_ops import (
|
2 |
-
convert_fp8,
|
3 |
-
copy_blocks,
|
4 |
-
paged_attention_v1,
|
5 |
-
paged_attention_v2,
|
6 |
-
reshape_and_cache,
|
7 |
-
reshape_and_cache_flash,
|
8 |
-
swap_blocks,
|
9 |
-
)
|
10 |
-
from ._ops import ops
|
11 |
-
|
12 |
-
__all__ = [
|
13 |
-
"convert_fp8",
|
14 |
-
"copy_blocks",
|
15 |
-
"ops",
|
16 |
-
"paged_attention_v1",
|
17 |
-
"paged_attention_v2",
|
18 |
-
"reshape_and_cache",
|
19 |
-
"reshape_and_cache_flash",
|
20 |
-
"swap_blocks",
|
21 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_custom_ops.py
DELETED
@@ -1,173 +0,0 @@
|
|
1 |
-
from typing import List, Optional
|
2 |
-
|
3 |
-
import torch
|
4 |
-
|
5 |
-
from ._ops import ops
|
6 |
-
|
7 |
-
|
8 |
-
# page attention ops
|
9 |
-
def paged_attention_v1(
|
10 |
-
out: torch.Tensor,
|
11 |
-
query: torch.Tensor,
|
12 |
-
key_cache: torch.Tensor,
|
13 |
-
value_cache: torch.Tensor,
|
14 |
-
num_kv_heads: int,
|
15 |
-
scale: float,
|
16 |
-
block_tables: torch.Tensor,
|
17 |
-
seq_lens: torch.Tensor,
|
18 |
-
block_size: int,
|
19 |
-
max_seq_len: int,
|
20 |
-
alibi_slopes: Optional[torch.Tensor],
|
21 |
-
kv_cache_dtype: str,
|
22 |
-
k_scale: float,
|
23 |
-
v_scale: float,
|
24 |
-
tp_rank: int = 0,
|
25 |
-
blocksparse_local_blocks: int = 0,
|
26 |
-
blocksparse_vert_stride: int = 0,
|
27 |
-
blocksparse_block_size: int = 64,
|
28 |
-
blocksparse_head_sliding_step: int = 0,
|
29 |
-
) -> None:
|
30 |
-
ops.paged_attention_v1(
|
31 |
-
out,
|
32 |
-
query,
|
33 |
-
key_cache,
|
34 |
-
value_cache,
|
35 |
-
num_kv_heads,
|
36 |
-
scale,
|
37 |
-
block_tables,
|
38 |
-
seq_lens,
|
39 |
-
block_size,
|
40 |
-
max_seq_len,
|
41 |
-
alibi_slopes,
|
42 |
-
kv_cache_dtype,
|
43 |
-
k_scale,
|
44 |
-
v_scale,
|
45 |
-
tp_rank,
|
46 |
-
blocksparse_local_blocks,
|
47 |
-
blocksparse_vert_stride,
|
48 |
-
blocksparse_block_size,
|
49 |
-
blocksparse_head_sliding_step,
|
50 |
-
)
|
51 |
-
|
52 |
-
|
53 |
-
def paged_attention_v2(
|
54 |
-
out: torch.Tensor,
|
55 |
-
exp_sum: torch.Tensor,
|
56 |
-
max_logits: torch.Tensor,
|
57 |
-
tmp_out: torch.Tensor,
|
58 |
-
query: torch.Tensor,
|
59 |
-
key_cache: torch.Tensor,
|
60 |
-
value_cache: torch.Tensor,
|
61 |
-
num_kv_heads: int,
|
62 |
-
scale: float,
|
63 |
-
block_tables: torch.Tensor,
|
64 |
-
seq_lens: torch.Tensor,
|
65 |
-
block_size: int,
|
66 |
-
max_seq_len: int,
|
67 |
-
alibi_slopes: Optional[torch.Tensor],
|
68 |
-
kv_cache_dtype: str,
|
69 |
-
k_scale: float,
|
70 |
-
v_scale: float,
|
71 |
-
tp_rank: int = 0,
|
72 |
-
blocksparse_local_blocks: int = 0,
|
73 |
-
blocksparse_vert_stride: int = 0,
|
74 |
-
blocksparse_block_size: int = 64,
|
75 |
-
blocksparse_head_sliding_step: int = 0,
|
76 |
-
) -> None:
|
77 |
-
ops.paged_attention_v2(
|
78 |
-
out,
|
79 |
-
exp_sum,
|
80 |
-
max_logits,
|
81 |
-
tmp_out,
|
82 |
-
query,
|
83 |
-
key_cache,
|
84 |
-
value_cache,
|
85 |
-
num_kv_heads,
|
86 |
-
scale,
|
87 |
-
block_tables,
|
88 |
-
seq_lens,
|
89 |
-
block_size,
|
90 |
-
max_seq_len,
|
91 |
-
alibi_slopes,
|
92 |
-
kv_cache_dtype,
|
93 |
-
k_scale,
|
94 |
-
v_scale,
|
95 |
-
tp_rank,
|
96 |
-
blocksparse_local_blocks,
|
97 |
-
blocksparse_vert_stride,
|
98 |
-
blocksparse_block_size,
|
99 |
-
blocksparse_head_sliding_step,
|
100 |
-
)
|
101 |
-
|
102 |
-
|
103 |
-
def reshape_and_cache(
|
104 |
-
key: torch.Tensor,
|
105 |
-
value: torch.Tensor,
|
106 |
-
key_cache: torch.Tensor,
|
107 |
-
value_cache: torch.Tensor,
|
108 |
-
slot_mapping: torch.Tensor,
|
109 |
-
kv_cache_dtype: str,
|
110 |
-
k_scale: float,
|
111 |
-
v_scale: float,
|
112 |
-
) -> None:
|
113 |
-
ops.reshape_and_cache(
|
114 |
-
key,
|
115 |
-
value,
|
116 |
-
key_cache,
|
117 |
-
value_cache,
|
118 |
-
slot_mapping,
|
119 |
-
kv_cache_dtype,
|
120 |
-
k_scale,
|
121 |
-
v_scale,
|
122 |
-
)
|
123 |
-
|
124 |
-
|
125 |
-
def reshape_and_cache_flash(
|
126 |
-
key: torch.Tensor,
|
127 |
-
value: torch.Tensor,
|
128 |
-
key_cache: torch.Tensor,
|
129 |
-
value_cache: torch.Tensor,
|
130 |
-
slot_mapping: torch.Tensor,
|
131 |
-
kv_cache_dtype: str,
|
132 |
-
k_scale: torch.Tensor,
|
133 |
-
v_scale: torch.Tensor,
|
134 |
-
) -> None:
|
135 |
-
ops.reshape_and_cache_flash(
|
136 |
-
key,
|
137 |
-
value,
|
138 |
-
key_cache,
|
139 |
-
value_cache,
|
140 |
-
slot_mapping,
|
141 |
-
kv_cache_dtype,
|
142 |
-
k_scale,
|
143 |
-
v_scale,
|
144 |
-
)
|
145 |
-
|
146 |
-
|
147 |
-
def copy_blocks(
|
148 |
-
key_caches: List[torch.Tensor],
|
149 |
-
value_caches: List[torch.Tensor],
|
150 |
-
block_mapping: torch.Tensor,
|
151 |
-
) -> None:
|
152 |
-
ops.copy_blocks(key_caches, value_caches, block_mapping)
|
153 |
-
|
154 |
-
|
155 |
-
def swap_blocks(
|
156 |
-
src: torch.Tensor, dst: torch.Tensor, block_mapping: torch.Tensor
|
157 |
-
) -> None:
|
158 |
-
ops.swap_blocks(src, dst, block_mapping)
|
159 |
-
|
160 |
-
|
161 |
-
def convert_fp8(
|
162 |
-
output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8"
|
163 |
-
) -> None:
|
164 |
-
ops.convert_fp8(output, input, scale, kv_dtype)
|
165 |
-
|
166 |
-
|
167 |
-
__all__ = [
|
168 |
-
"convert_fp8",
|
169 |
-
"paged_attention_v1",
|
170 |
-
"paged_attention_v2",
|
171 |
-
"reshape_and_cache",
|
172 |
-
"copy_blocks",
|
173 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_ops.py
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
from . import _paged_attention_daf6221
|
3 |
-
ops = torch.ops._paged_attention_daf6221
|
4 |
-
|
5 |
-
def add_op_namespace_prefix(op_name: str):
|
6 |
-
"""
|
7 |
-
Prefix op by namespace.
|
8 |
-
"""
|
9 |
-
return f"_paged_attention_daf6221::{op_name}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
build/torch25-cxx11-cu124-x86_64-linux/paged_attention/platforms.py
DELETED
@@ -1,62 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import random
|
3 |
-
from abc import ABC, abstractmethod
|
4 |
-
from functools import lru_cache, wraps
|
5 |
-
from typing import Callable, ParamSpec, TypeVar
|
6 |
-
|
7 |
-
import numpy as np
|
8 |
-
import torch
|
9 |
-
|
10 |
-
IS_ROCM = torch.version.hip is not None
|
11 |
-
|
12 |
-
|
13 |
-
class Platform(ABC):
|
14 |
-
@classmethod
|
15 |
-
def seed_everything(cls, seed: int) -> None:
|
16 |
-
"""
|
17 |
-
Set the seed of each random module.
|
18 |
-
`torch.manual_seed` will set seed on all devices.
|
19 |
-
|
20 |
-
Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
|
21 |
-
"""
|
22 |
-
random.seed(seed)
|
23 |
-
np.random.seed(seed)
|
24 |
-
torch.manual_seed(seed)
|
25 |
-
|
26 |
-
@abstractmethod
|
27 |
-
def get_device_name(self, device_id: int = 0) -> str: ...
|
28 |
-
|
29 |
-
@abstractmethod
|
30 |
-
def is_cuda(self) -> bool: ...
|
31 |
-
|
32 |
-
@abstractmethod
|
33 |
-
def is_rocm(self) -> bool: ...
|
34 |
-
|
35 |
-
|
36 |
-
class CudaPlatform(Platform):
|
37 |
-
@classmethod
|
38 |
-
@lru_cache(maxsize=8)
|
39 |
-
def get_device_name(cls, device_id: int = 0) -> str:
|
40 |
-
return torch.cuda.get_device_name(0)
|
41 |
-
|
42 |
-
def is_cuda(self) -> bool:
|
43 |
-
return True
|
44 |
-
|
45 |
-
def is_rocm(self) -> bool:
|
46 |
-
return False
|
47 |
-
|
48 |
-
|
49 |
-
class RocmPlatform(Platform):
|
50 |
-
@classmethod
|
51 |
-
@lru_cache(maxsize=8)
|
52 |
-
def get_device_name(cls, device_id: int = 0) -> str:
|
53 |
-
return torch.cuda.get_device_name(device_id)
|
54 |
-
|
55 |
-
def is_cuda(self) -> bool:
|
56 |
-
return False
|
57 |
-
|
58 |
-
def is_rocm(self) -> bool:
|
59 |
-
return True
|
60 |
-
|
61 |
-
|
62 |
-
current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
build/torch25-cxx98-cu118-x86_64-linux/paged_attention/__init__.py
DELETED
@@ -1,21 +0,0 @@
|
|
1 |
-
from ._custom_ops import (
|
2 |
-
convert_fp8,
|
3 |
-
copy_blocks,
|
4 |
-
paged_attention_v1,
|
5 |
-
paged_attention_v2,
|
6 |
-
reshape_and_cache,
|
7 |
-
reshape_and_cache_flash,
|
8 |
-
swap_blocks,
|
9 |
-
)
|
10 |
-
from ._ops import ops
|
11 |
-
|
12 |
-
__all__ = [
|
13 |
-
"convert_fp8",
|
14 |
-
"copy_blocks",
|
15 |
-
"ops",
|
16 |
-
"paged_attention_v1",
|
17 |
-
"paged_attention_v2",
|
18 |
-
"reshape_and_cache",
|
19 |
-
"reshape_and_cache_flash",
|
20 |
-
"swap_blocks",
|
21 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
build/torch25-cxx98-cu118-x86_64-linux/paged_attention/_custom_ops.py
DELETED
@@ -1,173 +0,0 @@
|
|
1 |
-
from typing import List, Optional
|
2 |
-
|
3 |
-
import torch
|
4 |
-
|
5 |
-
from ._ops import ops
|
6 |
-
|
7 |
-
|
8 |
-
# page attention ops
|
9 |
-
def paged_attention_v1(
|
10 |
-
out: torch.Tensor,
|
11 |
-
query: torch.Tensor,
|
12 |
-
key_cache: torch.Tensor,
|
13 |
-
value_cache: torch.Tensor,
|
14 |
-
num_kv_heads: int,
|
15 |
-
scale: float,
|
16 |
-
block_tables: torch.Tensor,
|
17 |
-
seq_lens: torch.Tensor,
|
18 |
-
block_size: int,
|
19 |
-
max_seq_len: int,
|
20 |
-
alibi_slopes: Optional[torch.Tensor],
|
21 |
-
kv_cache_dtype: str,
|
22 |
-
k_scale: float,
|
23 |
-
v_scale: float,
|
24 |
-
tp_rank: int = 0,
|
25 |
-
blocksparse_local_blocks: int = 0,
|
26 |
-
blocksparse_vert_stride: int = 0,
|
27 |
-
blocksparse_block_size: int = 64,
|
28 |
-
blocksparse_head_sliding_step: int = 0,
|
29 |
-
) -> None:
|
30 |
-
ops.paged_attention_v1(
|
31 |
-
out,
|
32 |
-
query,
|
33 |
-
key_cache,
|
34 |
-
value_cache,
|
35 |
-
num_kv_heads,
|
36 |
-
scale,
|
37 |
-
block_tables,
|
38 |
-
seq_lens,
|
39 |
-
block_size,
|
40 |
-
max_seq_len,
|
41 |
-
alibi_slopes,
|
42 |
-
kv_cache_dtype,
|
43 |
-
k_scale,
|
44 |
-
v_scale,
|
45 |
-
tp_rank,
|
46 |
-
blocksparse_local_blocks,
|
47 |
-
blocksparse_vert_stride,
|
48 |
-
blocksparse_block_size,
|
49 |
-
blocksparse_head_sliding_step,
|
50 |
-
)
|
51 |
-
|
52 |
-
|
53 |
-
def paged_attention_v2(
|
54 |
-
out: torch.Tensor,
|
55 |
-
exp_sum: torch.Tensor,
|
56 |
-
max_logits: torch.Tensor,
|
57 |
-
tmp_out: torch.Tensor,
|
58 |
-
query: torch.Tensor,
|
59 |
-
key_cache: torch.Tensor,
|
60 |
-
value_cache: torch.Tensor,
|
61 |
-
num_kv_heads: int,
|
62 |
-
scale: float,
|
63 |
-
block_tables: torch.Tensor,
|
64 |
-
seq_lens: torch.Tensor,
|
65 |
-
block_size: int,
|
66 |
-
max_seq_len: int,
|
67 |
-
alibi_slopes: Optional[torch.Tensor],
|
68 |
-
kv_cache_dtype: str,
|
69 |
-
k_scale: float,
|
70 |
-
v_scale: float,
|
71 |
-
tp_rank: int = 0,
|
72 |
-
blocksparse_local_blocks: int = 0,
|
73 |
-
blocksparse_vert_stride: int = 0,
|
74 |
-
blocksparse_block_size: int = 64,
|
75 |
-
blocksparse_head_sliding_step: int = 0,
|
76 |
-
) -> None:
|
77 |
-
ops.paged_attention_v2(
|
78 |
-
out,
|
79 |
-
exp_sum,
|
80 |
-
max_logits,
|
81 |
-
tmp_out,
|
82 |
-
query,
|
83 |
-
key_cache,
|
84 |
-
value_cache,
|
85 |
-
num_kv_heads,
|
86 |
-
scale,
|
87 |
-
block_tables,
|
88 |
-
seq_lens,
|
89 |
-
block_size,
|
90 |
-
max_seq_len,
|
91 |
-
alibi_slopes,
|
92 |
-
kv_cache_dtype,
|
93 |
-
k_scale,
|
94 |
-
v_scale,
|
95 |
-
tp_rank,
|
96 |
-
blocksparse_local_blocks,
|
97 |
-
blocksparse_vert_stride,
|
98 |
-
blocksparse_block_size,
|
99 |
-
blocksparse_head_sliding_step,
|
100 |
-
)
|
101 |
-
|
102 |
-
|
103 |
-
def reshape_and_cache(
|
104 |
-
key: torch.Tensor,
|
105 |
-
value: torch.Tensor,
|
106 |
-
key_cache: torch.Tensor,
|
107 |
-
value_cache: torch.Tensor,
|
108 |
-
slot_mapping: torch.Tensor,
|
109 |
-
kv_cache_dtype: str,
|
110 |
-
k_scale: float,
|
111 |
-
v_scale: float,
|
112 |
-
) -> None:
|
113 |
-
ops.reshape_and_cache(
|
114 |
-
key,
|
115 |
-
value,
|
116 |
-
key_cache,
|
117 |
-
value_cache,
|
118 |
-
slot_mapping,
|
119 |
-
kv_cache_dtype,
|
120 |
-
k_scale,
|
121 |
-
v_scale,
|
122 |
-
)
|
123 |
-
|
124 |
-
|
125 |
-
def reshape_and_cache_flash(
|
126 |
-
key: torch.Tensor,
|
127 |
-
value: torch.Tensor,
|
128 |
-
key_cache: torch.Tensor,
|
129 |
-
value_cache: torch.Tensor,
|
130 |
-
slot_mapping: torch.Tensor,
|
131 |
-
kv_cache_dtype: str,
|
132 |
-
k_scale: torch.Tensor,
|
133 |
-
v_scale: torch.Tensor,
|
134 |
-
) -> None:
|
135 |
-
ops.reshape_and_cache_flash(
|
136 |
-
key,
|
137 |
-
value,
|
138 |
-
key_cache,
|
139 |
-
value_cache,
|
140 |
-
slot_mapping,
|
141 |
-
kv_cache_dtype,
|
142 |
-
k_scale,
|
143 |
-
v_scale,
|
144 |
-
)
|
145 |
-
|
146 |
-
|
147 |
-
def copy_blocks(
|
148 |
-
key_caches: List[torch.Tensor],
|
149 |
-
value_caches: List[torch.Tensor],
|
150 |
-
block_mapping: torch.Tensor,
|
151 |
-
) -> None:
|
152 |
-
ops.copy_blocks(key_caches, value_caches, block_mapping)
|
153 |
-
|
154 |
-
|
155 |
-
def swap_blocks(
|
156 |
-
src: torch.Tensor, dst: torch.Tensor, block_mapping: torch.Tensor
|
157 |
-
) -> None:
|
158 |
-
ops.swap_blocks(src, dst, block_mapping)
|
159 |
-
|
160 |
-
|
161 |
-
def convert_fp8(
|
162 |
-
output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8"
|
163 |
-
) -> None:
|
164 |
-
ops.convert_fp8(output, input, scale, kv_dtype)
|
165 |
-
|
166 |
-
|
167 |
-
__all__ = [
|
168 |
-
"convert_fp8",
|
169 |
-
"paged_attention_v1",
|
170 |
-
"paged_attention_v2",
|
171 |
-
"reshape_and_cache",
|
172 |
-
"copy_blocks",
|
173 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
build/torch25-cxx98-cu121-x86_64-linux/paged_attention/__init__.py
DELETED
@@ -1,21 +0,0 @@
|
|
1 |
-
from ._custom_ops import (
|
2 |
-
convert_fp8,
|
3 |
-
copy_blocks,
|
4 |
-
paged_attention_v1,
|
5 |
-
paged_attention_v2,
|
6 |
-
reshape_and_cache,
|
7 |
-
reshape_and_cache_flash,
|
8 |
-
swap_blocks,
|
9 |
-
)
|
10 |
-
from ._ops import ops
|
11 |
-
|
12 |
-
__all__ = [
|
13 |
-
"convert_fp8",
|
14 |
-
"copy_blocks",
|
15 |
-
"ops",
|
16 |
-
"paged_attention_v1",
|
17 |
-
"paged_attention_v2",
|
18 |
-
"reshape_and_cache",
|
19 |
-
"reshape_and_cache_flash",
|
20 |
-
"swap_blocks",
|
21 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_custom_ops.py
DELETED
@@ -1,173 +0,0 @@
|
|
1 |
-
from typing import List, Optional
|
2 |
-
|
3 |
-
import torch
|
4 |
-
|
5 |
-
from ._ops import ops
|
6 |
-
|
7 |
-
|
8 |
-
# page attention ops
|
9 |
-
def paged_attention_v1(
|
10 |
-
out: torch.Tensor,
|
11 |
-
query: torch.Tensor,
|
12 |
-
key_cache: torch.Tensor,
|
13 |
-
value_cache: torch.Tensor,
|
14 |
-
num_kv_heads: int,
|
15 |
-
scale: float,
|
16 |
-
block_tables: torch.Tensor,
|
17 |
-
seq_lens: torch.Tensor,
|
18 |
-
block_size: int,
|
19 |
-
max_seq_len: int,
|
20 |
-
alibi_slopes: Optional[torch.Tensor],
|
21 |
-
kv_cache_dtype: str,
|
22 |
-
k_scale: float,
|
23 |
-
v_scale: float,
|
24 |
-
tp_rank: int = 0,
|
25 |
-
blocksparse_local_blocks: int = 0,
|
26 |
-
blocksparse_vert_stride: int = 0,
|
27 |
-
blocksparse_block_size: int = 64,
|
28 |
-
blocksparse_head_sliding_step: int = 0,
|
29 |
-
) -> None:
|
30 |
-
ops.paged_attention_v1(
|
31 |
-
out,
|
32 |
-
query,
|
33 |
-
key_cache,
|
34 |
-
value_cache,
|
35 |
-
num_kv_heads,
|
36 |
-
scale,
|
37 |
-
block_tables,
|
38 |
-
seq_lens,
|
39 |
-
block_size,
|
40 |
-
max_seq_len,
|
41 |
-
alibi_slopes,
|
42 |
-
kv_cache_dtype,
|
43 |
-
k_scale,
|
44 |
-
v_scale,
|
45 |
-
tp_rank,
|
46 |
-
blocksparse_local_blocks,
|
47 |
-
blocksparse_vert_stride,
|
48 |
-
blocksparse_block_size,
|
49 |
-
blocksparse_head_sliding_step,
|
50 |
-
)
|
51 |
-
|
52 |
-
|
53 |
-
def paged_attention_v2(
|
54 |
-
out: torch.Tensor,
|
55 |
-
exp_sum: torch.Tensor,
|
56 |
-
max_logits: torch.Tensor,
|
57 |
-
tmp_out: torch.Tensor,
|
58 |
-
query: torch.Tensor,
|
59 |
-
key_cache: torch.Tensor,
|
60 |
-
value_cache: torch.Tensor,
|
61 |
-
num_kv_heads: int,
|
62 |
-
scale: float,
|
63 |
-
block_tables: torch.Tensor,
|
64 |
-
seq_lens: torch.Tensor,
|
65 |
-
block_size: int,
|
66 |
-
max_seq_len: int,
|
67 |
-
alibi_slopes: Optional[torch.Tensor],
|
68 |
-
kv_cache_dtype: str,
|
69 |
-
k_scale: float,
|
70 |
-
v_scale: float,
|
71 |
-
tp_rank: int = 0,
|
72 |
-
blocksparse_local_blocks: int = 0,
|
73 |
-
blocksparse_vert_stride: int = 0,
|
74 |
-
blocksparse_block_size: int = 64,
|
75 |
-
blocksparse_head_sliding_step: int = 0,
|
76 |
-
) -> None:
|
77 |
-
ops.paged_attention_v2(
|
78 |
-
out,
|
79 |
-
exp_sum,
|
80 |
-
max_logits,
|
81 |
-
tmp_out,
|
82 |
-
query,
|
83 |
-
key_cache,
|
84 |
-
value_cache,
|
85 |
-
num_kv_heads,
|
86 |
-
scale,
|
87 |
-
block_tables,
|
88 |
-
seq_lens,
|
89 |
-
block_size,
|
90 |
-
max_seq_len,
|
91 |
-
alibi_slopes,
|
92 |
-
kv_cache_dtype,
|
93 |
-
k_scale,
|
94 |
-
v_scale,
|
95 |
-
tp_rank,
|
96 |
-
blocksparse_local_blocks,
|
97 |
-
blocksparse_vert_stride,
|
98 |
-
blocksparse_block_size,
|
99 |
-
blocksparse_head_sliding_step,
|
100 |
-
)
|
101 |
-
|
102 |
-
|
103 |
-
def reshape_and_cache(
|
104 |
-
key: torch.Tensor,
|
105 |
-
value: torch.Tensor,
|
106 |
-
key_cache: torch.Tensor,
|
107 |
-
value_cache: torch.Tensor,
|
108 |
-
slot_mapping: torch.Tensor,
|
109 |
-
kv_cache_dtype: str,
|
110 |
-
k_scale: float,
|
111 |
-
v_scale: float,
|
112 |
-
) -> None:
|
113 |
-
ops.reshape_and_cache(
|
114 |
-
key,
|
115 |
-
value,
|
116 |
-
key_cache,
|
117 |
-
value_cache,
|
118 |
-
slot_mapping,
|
119 |
-
kv_cache_dtype,
|
120 |
-
k_scale,
|
121 |
-
v_scale,
|
122 |
-
)
|
123 |
-
|
124 |
-
|
125 |
-
def reshape_and_cache_flash(
|
126 |
-
key: torch.Tensor,
|
127 |
-
value: torch.Tensor,
|
128 |
-
key_cache: torch.Tensor,
|
129 |
-
value_cache: torch.Tensor,
|
130 |
-
slot_mapping: torch.Tensor,
|
131 |
-
kv_cache_dtype: str,
|
132 |
-
k_scale: torch.Tensor,
|
133 |
-
v_scale: torch.Tensor,
|
134 |
-
) -> None:
|
135 |
-
ops.reshape_and_cache_flash(
|
136 |
-
key,
|
137 |
-
value,
|
138 |
-
key_cache,
|
139 |
-
value_cache,
|
140 |
-
slot_mapping,
|
141 |
-
kv_cache_dtype,
|
142 |
-
k_scale,
|
143 |
-
v_scale,
|
144 |
-
)
|
145 |
-
|
146 |
-
|
147 |
-
def copy_blocks(
|
148 |
-
key_caches: List[torch.Tensor],
|
149 |
-
value_caches: List[torch.Tensor],
|
150 |
-
block_mapping: torch.Tensor,
|
151 |
-
) -> None:
|
152 |
-
ops.copy_blocks(key_caches, value_caches, block_mapping)
|
153 |
-
|
154 |
-
|
155 |
-
def swap_blocks(
|
156 |
-
src: torch.Tensor, dst: torch.Tensor, block_mapping: torch.Tensor
|
157 |
-
) -> None:
|
158 |
-
ops.swap_blocks(src, dst, block_mapping)
|
159 |
-
|
160 |
-
|
161 |
-
def convert_fp8(
|
162 |
-
output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8"
|
163 |
-
) -> None:
|
164 |
-
ops.convert_fp8(output, input, scale, kv_dtype)
|
165 |
-
|
166 |
-
|
167 |
-
__all__ = [
|
168 |
-
"convert_fp8",
|
169 |
-
"paged_attention_v1",
|
170 |
-
"paged_attention_v2",
|
171 |
-
"reshape_and_cache",
|
172 |
-
"copy_blocks",
|
173 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_ops.py
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
from . import _paged_attention_daf6221
|
3 |
-
ops = torch.ops._paged_attention_daf6221
|
4 |
-
|
5 |
-
def add_op_namespace_prefix(op_name: str):
|
6 |
-
"""
|
7 |
-
Prefix op by namespace.
|
8 |
-
"""
|
9 |
-
return f"_paged_attention_daf6221::{op_name}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:684a1670bd113fee852580728961adcc53b7adb8e563aa672df223e7bff0c9a6
|
3 |
-
size 87913456
|
|
|
|
|
|
|
|
build/torch25-cxx98-cu121-x86_64-linux/paged_attention/platforms.py
DELETED
@@ -1,62 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import random
|
3 |
-
from abc import ABC, abstractmethod
|
4 |
-
from functools import lru_cache, wraps
|
5 |
-
from typing import Callable, ParamSpec, TypeVar
|
6 |
-
|
7 |
-
import numpy as np
|
8 |
-
import torch
|
9 |
-
|
10 |
-
IS_ROCM = torch.version.hip is not None
|
11 |
-
|
12 |
-
|
13 |
-
class Platform(ABC):
|
14 |
-
@classmethod
|
15 |
-
def seed_everything(cls, seed: int) -> None:
|
16 |
-
"""
|
17 |
-
Set the seed of each random module.
|
18 |
-
`torch.manual_seed` will set seed on all devices.
|
19 |
-
|
20 |
-
Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
|
21 |
-
"""
|
22 |
-
random.seed(seed)
|
23 |
-
np.random.seed(seed)
|
24 |
-
torch.manual_seed(seed)
|
25 |
-
|
26 |
-
@abstractmethod
|
27 |
-
def get_device_name(self, device_id: int = 0) -> str: ...
|
28 |
-
|
29 |
-
@abstractmethod
|
30 |
-
def is_cuda(self) -> bool: ...
|
31 |
-
|
32 |
-
@abstractmethod
|
33 |
-
def is_rocm(self) -> bool: ...
|
34 |
-
|
35 |
-
|
36 |
-
class CudaPlatform(Platform):
|
37 |
-
@classmethod
|
38 |
-
@lru_cache(maxsize=8)
|
39 |
-
def get_device_name(cls, device_id: int = 0) -> str:
|
40 |
-
return torch.cuda.get_device_name(0)
|
41 |
-
|
42 |
-
def is_cuda(self) -> bool:
|
43 |
-
return True
|
44 |
-
|
45 |
-
def is_rocm(self) -> bool:
|
46 |
-
return False
|
47 |
-
|
48 |
-
|
49 |
-
class RocmPlatform(Platform):
|
50 |
-
@classmethod
|
51 |
-
@lru_cache(maxsize=8)
|
52 |
-
def get_device_name(cls, device_id: int = 0) -> str:
|
53 |
-
return torch.cuda.get_device_name(device_id)
|
54 |
-
|
55 |
-
def is_cuda(self) -> bool:
|
56 |
-
return False
|
57 |
-
|
58 |
-
def is_rocm(self) -> bool:
|
59 |
-
return True
|
60 |
-
|
61 |
-
|
62 |
-
current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
build/torch25-cxx98-cu124-x86_64-linux/paged_attention/__init__.py
DELETED
@@ -1,21 +0,0 @@
|
|
1 |
-
from ._custom_ops import (
|
2 |
-
convert_fp8,
|
3 |
-
copy_blocks,
|
4 |
-
paged_attention_v1,
|
5 |
-
paged_attention_v2,
|
6 |
-
reshape_and_cache,
|
7 |
-
reshape_and_cache_flash,
|
8 |
-
swap_blocks,
|
9 |
-
)
|
10 |
-
from ._ops import ops
|
11 |
-
|
12 |
-
__all__ = [
|
13 |
-
"convert_fp8",
|
14 |
-
"copy_blocks",
|
15 |
-
"ops",
|
16 |
-
"paged_attention_v1",
|
17 |
-
"paged_attention_v2",
|
18 |
-
"reshape_and_cache",
|
19 |
-
"reshape_and_cache_flash",
|
20 |
-
"swap_blocks",
|
21 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_custom_ops.py
DELETED
@@ -1,173 +0,0 @@
|
|
1 |
-
from typing import List, Optional
|
2 |
-
|
3 |
-
import torch
|
4 |
-
|
5 |
-
from ._ops import ops
|
6 |
-
|
7 |
-
|
8 |
-
# page attention ops
|
9 |
-
def paged_attention_v1(
|
10 |
-
out: torch.Tensor,
|
11 |
-
query: torch.Tensor,
|
12 |
-
key_cache: torch.Tensor,
|
13 |
-
value_cache: torch.Tensor,
|
14 |
-
num_kv_heads: int,
|
15 |
-
scale: float,
|
16 |
-
block_tables: torch.Tensor,
|
17 |
-
seq_lens: torch.Tensor,
|
18 |
-
block_size: int,
|
19 |
-
max_seq_len: int,
|
20 |
-
alibi_slopes: Optional[torch.Tensor],
|
21 |
-
kv_cache_dtype: str,
|
22 |
-
k_scale: float,
|
23 |
-
v_scale: float,
|
24 |
-
tp_rank: int = 0,
|
25 |
-
blocksparse_local_blocks: int = 0,
|
26 |
-
blocksparse_vert_stride: int = 0,
|
27 |
-
blocksparse_block_size: int = 64,
|
28 |
-
blocksparse_head_sliding_step: int = 0,
|
29 |
-
) -> None:
|
30 |
-
ops.paged_attention_v1(
|
31 |
-
out,
|
32 |
-
query,
|
33 |
-
key_cache,
|
34 |
-
value_cache,
|
35 |
-
num_kv_heads,
|
36 |
-
scale,
|
37 |
-
block_tables,
|
38 |
-
seq_lens,
|
39 |
-
block_size,
|
40 |
-
max_seq_len,
|
41 |
-
alibi_slopes,
|
42 |
-
kv_cache_dtype,
|
43 |
-
k_scale,
|
44 |
-
v_scale,
|
45 |
-
tp_rank,
|
46 |
-
blocksparse_local_blocks,
|
47 |
-
blocksparse_vert_stride,
|
48 |
-
blocksparse_block_size,
|
49 |
-
blocksparse_head_sliding_step,
|
50 |
-
)
|
51 |
-
|
52 |
-
|
53 |
-
def paged_attention_v2(
|
54 |
-
out: torch.Tensor,
|
55 |
-
exp_sum: torch.Tensor,
|
56 |
-
max_logits: torch.Tensor,
|
57 |
-
tmp_out: torch.Tensor,
|
58 |
-
query: torch.Tensor,
|
59 |
-
key_cache: torch.Tensor,
|
60 |
-
value_cache: torch.Tensor,
|
61 |
-
num_kv_heads: int,
|
62 |
-
scale: float,
|
63 |
-
block_tables: torch.Tensor,
|
64 |
-
seq_lens: torch.Tensor,
|
65 |
-
block_size: int,
|
66 |
-
max_seq_len: int,
|
67 |
-
alibi_slopes: Optional[torch.Tensor],
|
68 |
-
kv_cache_dtype: str,
|
69 |
-
k_scale: float,
|
70 |
-
v_scale: float,
|
71 |
-
tp_rank: int = 0,
|
72 |
-
blocksparse_local_blocks: int = 0,
|
73 |
-
blocksparse_vert_stride: int = 0,
|
74 |
-
blocksparse_block_size: int = 64,
|
75 |
-
blocksparse_head_sliding_step: int = 0,
|
76 |
-
) -> None:
|
77 |
-
ops.paged_attention_v2(
|
78 |
-
out,
|
79 |
-
exp_sum,
|
80 |
-
max_logits,
|
81 |
-
tmp_out,
|
82 |
-
query,
|
83 |
-
key_cache,
|
84 |
-
value_cache,
|
85 |
-
num_kv_heads,
|
86 |
-
scale,
|
87 |
-
block_tables,
|
88 |
-
seq_lens,
|
89 |
-
block_size,
|
90 |
-
max_seq_len,
|
91 |
-
alibi_slopes,
|
92 |
-
kv_cache_dtype,
|
93 |
-
k_scale,
|
94 |
-
v_scale,
|
95 |
-
tp_rank,
|
96 |
-
blocksparse_local_blocks,
|
97 |
-
blocksparse_vert_stride,
|
98 |
-
blocksparse_block_size,
|
99 |
-
blocksparse_head_sliding_step,
|
100 |
-
)
|
101 |
-
|
102 |
-
|
103 |
-
def reshape_and_cache(
|
104 |
-
key: torch.Tensor,
|
105 |
-
value: torch.Tensor,
|
106 |
-
key_cache: torch.Tensor,
|
107 |
-
value_cache: torch.Tensor,
|
108 |
-
slot_mapping: torch.Tensor,
|
109 |
-
kv_cache_dtype: str,
|
110 |
-
k_scale: float,
|
111 |
-
v_scale: float,
|
112 |
-
) -> None:
|
113 |
-
ops.reshape_and_cache(
|
114 |
-
key,
|
115 |
-
value,
|
116 |
-
key_cache,
|
117 |
-
value_cache,
|
118 |
-
slot_mapping,
|
119 |
-
kv_cache_dtype,
|
120 |
-
k_scale,
|
121 |
-
v_scale,
|
122 |
-
)
|
123 |
-
|
124 |
-
|
125 |
-
def reshape_and_cache_flash(
|
126 |
-
key: torch.Tensor,
|
127 |
-
value: torch.Tensor,
|
128 |
-
key_cache: torch.Tensor,
|
129 |
-
value_cache: torch.Tensor,
|
130 |
-
slot_mapping: torch.Tensor,
|
131 |
-
kv_cache_dtype: str,
|
132 |
-
k_scale: torch.Tensor,
|
133 |
-
v_scale: torch.Tensor,
|
134 |
-
) -> None:
|
135 |
-
ops.reshape_and_cache_flash(
|
136 |
-
key,
|
137 |
-
value,
|
138 |
-
key_cache,
|
139 |
-
value_cache,
|
140 |
-
slot_mapping,
|
141 |
-
kv_cache_dtype,
|
142 |
-
k_scale,
|
143 |
-
v_scale,
|
144 |
-
)
|
145 |
-
|
146 |
-
|
147 |
-
def copy_blocks(
|
148 |
-
key_caches: List[torch.Tensor],
|
149 |
-
value_caches: List[torch.Tensor],
|
150 |
-
block_mapping: torch.Tensor,
|
151 |
-
) -> None:
|
152 |
-
ops.copy_blocks(key_caches, value_caches, block_mapping)
|
153 |
-
|
154 |
-
|
155 |
-
def swap_blocks(
|
156 |
-
src: torch.Tensor, dst: torch.Tensor, block_mapping: torch.Tensor
|
157 |
-
) -> None:
|
158 |
-
ops.swap_blocks(src, dst, block_mapping)
|
159 |
-
|
160 |
-
|
161 |
-
def convert_fp8(
|
162 |
-
output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8"
|
163 |
-
) -> None:
|
164 |
-
ops.convert_fp8(output, input, scale, kv_dtype)
|
165 |
-
|
166 |
-
|
167 |
-
__all__ = [
|
168 |
-
"convert_fp8",
|
169 |
-
"paged_attention_v1",
|
170 |
-
"paged_attention_v2",
|
171 |
-
"reshape_and_cache",
|
172 |
-
"copy_blocks",
|
173 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_ops.py
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
from . import _paged_attention_daf6221
|
3 |
-
ops = torch.ops._paged_attention_daf6221
|
4 |
-
|
5 |
-
def add_op_namespace_prefix(op_name: str):
|
6 |
-
"""
|
7 |
-
Prefix op by namespace.
|
8 |
-
"""
|
9 |
-
return f"_paged_attention_daf6221::{op_name}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:d4fb3e5dd163dfce3d3032ff1969be0e96eff1568312935c8747beb402d6a2fd
|
3 |
-
size 88021624
|
|
|
|
|
|
|
|
build/torch25-cxx98-cu124-x86_64-linux/paged_attention/platforms.py
DELETED
@@ -1,62 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import random
|
3 |
-
from abc import ABC, abstractmethod
|
4 |
-
from functools import lru_cache, wraps
|
5 |
-
from typing import Callable, ParamSpec, TypeVar
|
6 |
-
|
7 |
-
import numpy as np
|
8 |
-
import torch
|
9 |
-
|
10 |
-
IS_ROCM = torch.version.hip is not None
|
11 |
-
|
12 |
-
|
13 |
-
class Platform(ABC):
|
14 |
-
@classmethod
|
15 |
-
def seed_everything(cls, seed: int) -> None:
|
16 |
-
"""
|
17 |
-
Set the seed of each random module.
|
18 |
-
`torch.manual_seed` will set seed on all devices.
|
19 |
-
|
20 |
-
Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
|
21 |
-
"""
|
22 |
-
random.seed(seed)
|
23 |
-
np.random.seed(seed)
|
24 |
-
torch.manual_seed(seed)
|
25 |
-
|
26 |
-
@abstractmethod
|
27 |
-
def get_device_name(self, device_id: int = 0) -> str: ...
|
28 |
-
|
29 |
-
@abstractmethod
|
30 |
-
def is_cuda(self) -> bool: ...
|
31 |
-
|
32 |
-
@abstractmethod
|
33 |
-
def is_rocm(self) -> bool: ...
|
34 |
-
|
35 |
-
|
36 |
-
class CudaPlatform(Platform):
|
37 |
-
@classmethod
|
38 |
-
@lru_cache(maxsize=8)
|
39 |
-
def get_device_name(cls, device_id: int = 0) -> str:
|
40 |
-
return torch.cuda.get_device_name(0)
|
41 |
-
|
42 |
-
def is_cuda(self) -> bool:
|
43 |
-
return True
|
44 |
-
|
45 |
-
def is_rocm(self) -> bool:
|
46 |
-
return False
|
47 |
-
|
48 |
-
|
49 |
-
class RocmPlatform(Platform):
|
50 |
-
@classmethod
|
51 |
-
@lru_cache(maxsize=8)
|
52 |
-
def get_device_name(cls, device_id: int = 0) -> str:
|
53 |
-
return torch.cuda.get_device_name(device_id)
|
54 |
-
|
55 |
-
def is_cuda(self) -> bool:
|
56 |
-
return False
|
57 |
-
|
58 |
-
def is_rocm(self) -> bool:
|
59 |
-
return True
|
60 |
-
|
61 |
-
|
62 |
-
current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (509 Bytes). View file
|
|
build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc
ADDED
Binary file (4.7 kB). View file
|
|
build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc
ADDED
Binary file (547 Bytes). View file
|
|
build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_ops.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _paged_attention_6677800
|
3 |
+
ops = torch.ops._paged_attention_6677800
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_paged_attention_6677800::{op_name}"
|
build/{torch25-cxx11-cu121-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so β torch26-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08e178e566aba62bbe98ba07bb0f83784095cb0d2fec7946a8d5773ca8e550ae
|
3 |
+
size 91845160
|
build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:14b647a190ade84ab061629eb9cfbf7bf104e653c6389609e85e92fa359a828e
|
3 |
-
size 91219624
|
|
|
|
|
|
|
|
build/torch26-cxx11-cu118-x86_64-linux/paged_attention/platforms.py
CHANGED
@@ -8,6 +8,7 @@ import numpy as np
|
|
8 |
import torch
|
9 |
|
10 |
IS_ROCM = torch.version.hip is not None
|
|
|
11 |
|
12 |
|
13 |
class Platform(ABC):
|
@@ -32,6 +33,9 @@ class Platform(ABC):
|
|
32 |
@abstractmethod
|
33 |
def is_rocm(self) -> bool: ...
|
34 |
|
|
|
|
|
|
|
35 |
|
36 |
class CudaPlatform(Platform):
|
37 |
@classmethod
|
@@ -45,6 +49,9 @@ class CudaPlatform(Platform):
|
|
45 |
def is_rocm(self) -> bool:
|
46 |
return False
|
47 |
|
|
|
|
|
|
|
48 |
|
49 |
class RocmPlatform(Platform):
|
50 |
@classmethod
|
@@ -58,5 +65,28 @@ class RocmPlatform(Platform):
|
|
58 |
def is_rocm(self) -> bool:
|
59 |
return True
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
-
current_platform =
|
|
|
|
|
|
|
|
|
|
|
|
8 |
import torch
|
9 |
|
10 |
IS_ROCM = torch.version.hip is not None
|
11 |
+
IS_MPS = torch.backends.mps.is_available()
|
12 |
|
13 |
|
14 |
class Platform(ABC):
|
|
|
33 |
@abstractmethod
|
34 |
def is_rocm(self) -> bool: ...
|
35 |
|
36 |
+
@abstractmethod
|
37 |
+
def is_mps(self) -> bool: ...
|
38 |
+
|
39 |
|
40 |
class CudaPlatform(Platform):
|
41 |
@classmethod
|
|
|
49 |
def is_rocm(self) -> bool:
|
50 |
return False
|
51 |
|
52 |
+
def is_mps(self) -> bool:
|
53 |
+
return False
|
54 |
+
|
55 |
|
56 |
class RocmPlatform(Platform):
|
57 |
@classmethod
|
|
|
65 |
def is_rocm(self) -> bool:
|
66 |
return True
|
67 |
|
68 |
+
def is_mps(self) -> bool:
|
69 |
+
return False
|
70 |
+
|
71 |
+
|
72 |
+
class MpsPlatform(Platform):
|
73 |
+
@classmethod
|
74 |
+
@lru_cache(maxsize=8)
|
75 |
+
def get_device_name(cls, device_id: int = 0) -> str:
|
76 |
+
return torch.cuda.get_device_name(device_id)
|
77 |
+
|
78 |
+
def is_cuda(self) -> bool:
|
79 |
+
return False
|
80 |
+
|
81 |
+
def is_rocm(self) -> bool:
|
82 |
+
return False
|
83 |
+
|
84 |
+
def is_mps(self) -> bool:
|
85 |
+
return True
|
86 |
|
87 |
+
current_platform = (
|
88 |
+
RocmPlatform() if IS_ROCM else
|
89 |
+
MpsPlatform() if IS_MPS else
|
90 |
+
CudaPlatform() if torch.cuda.is_available() else
|
91 |
+
None
|
92 |
+
)
|
build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (509 Bytes). View file
|
|
build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc
ADDED
Binary file (4.7 kB). View file
|
|
build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc
ADDED
Binary file (547 Bytes). View file
|
|
build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_ops.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _paged_attention_6677800
|
3 |
+
ops = torch.ops._paged_attention_6677800
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_paged_attention_6677800::{op_name}"
|
build/{torch25-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so β torch26-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1bc02dd09d997be7c2d3c996d5716ff269b4e4094b6cab70f4ae73c3763c36aa
|
3 |
+
size 88666456
|
build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:b7faf4c16cf3cae3d4feead6195ece3f0865e602df4b04804e075697b72d5afa
|
3 |
-
size 88040824
|
|
|
|
|
|
|
|
build/torch26-cxx11-cu124-x86_64-linux/paged_attention/platforms.py
CHANGED
@@ -8,6 +8,7 @@ import numpy as np
|
|
8 |
import torch
|
9 |
|
10 |
IS_ROCM = torch.version.hip is not None
|
|
|
11 |
|
12 |
|
13 |
class Platform(ABC):
|
@@ -32,6 +33,9 @@ class Platform(ABC):
|
|
32 |
@abstractmethod
|
33 |
def is_rocm(self) -> bool: ...
|
34 |
|
|
|
|
|
|
|
35 |
|
36 |
class CudaPlatform(Platform):
|
37 |
@classmethod
|
@@ -45,6 +49,9 @@ class CudaPlatform(Platform):
|
|
45 |
def is_rocm(self) -> bool:
|
46 |
return False
|
47 |
|
|
|
|
|
|
|
48 |
|
49 |
class RocmPlatform(Platform):
|
50 |
@classmethod
|
@@ -58,5 +65,28 @@ class RocmPlatform(Platform):
|
|
58 |
def is_rocm(self) -> bool:
|
59 |
return True
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
-
current_platform =
|
|
|
|
|
|
|
|
|
|
|
|
8 |
import torch
|
9 |
|
10 |
IS_ROCM = torch.version.hip is not None
|
11 |
+
IS_MPS = torch.backends.mps.is_available()
|
12 |
|
13 |
|
14 |
class Platform(ABC):
|
|
|
33 |
@abstractmethod
|
34 |
def is_rocm(self) -> bool: ...
|
35 |
|
36 |
+
@abstractmethod
|
37 |
+
def is_mps(self) -> bool: ...
|
38 |
+
|
39 |
|
40 |
class CudaPlatform(Platform):
|
41 |
@classmethod
|
|
|
49 |
def is_rocm(self) -> bool:
|
50 |
return False
|
51 |
|
52 |
+
def is_mps(self) -> bool:
|
53 |
+
return False
|
54 |
+
|
55 |
|
56 |
class RocmPlatform(Platform):
|
57 |
@classmethod
|
|
|
65 |
def is_rocm(self) -> bool:
|
66 |
return True
|
67 |
|
68 |
+
def is_mps(self) -> bool:
|
69 |
+
return False
|
70 |
+
|
71 |
+
|
72 |
+
class MpsPlatform(Platform):
|
73 |
+
@classmethod
|
74 |
+
@lru_cache(maxsize=8)
|
75 |
+
def get_device_name(cls, device_id: int = 0) -> str:
|
76 |
+
return torch.cuda.get_device_name(device_id)
|
77 |
+
|
78 |
+
def is_cuda(self) -> bool:
|
79 |
+
return False
|
80 |
+
|
81 |
+
def is_rocm(self) -> bool:
|
82 |
+
return False
|
83 |
+
|
84 |
+
def is_mps(self) -> bool:
|
85 |
+
return True
|
86 |
|
87 |
+
current_platform = (
|
88 |
+
RocmPlatform() if IS_ROCM else
|
89 |
+
MpsPlatform() if IS_MPS else
|
90 |
+
CudaPlatform() if torch.cuda.is_available() else
|
91 |
+
None
|
92 |
+
)
|
build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (509 Bytes). View file
|
|
build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc
ADDED
Binary file (4.7 kB). View file
|
|
build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc
ADDED
Binary file (547 Bytes). View file
|
|
build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_ops.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _paged_attention_6677800
|
3 |
+
ops = torch.ops._paged_attention_6677800
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_paged_attention_6677800::{op_name}"
|
build/{torch25-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so β torch26-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5077b6b8fffb349c79738c345b02903f643aa9530d10269ea143e8f3125d10e9
|
3 |
+
size 88425448
|
build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:b6b6d748077a864b5f3a9b5973cf453d00a685b70654b5a40630891a86d19230
|
3 |
-
size 87803904
|
|
|
|
|
|
|
|
build/torch26-cxx11-cu126-x86_64-linux/paged_attention/platforms.py
CHANGED
@@ -8,6 +8,7 @@ import numpy as np
|
|
8 |
import torch
|
9 |
|
10 |
IS_ROCM = torch.version.hip is not None
|
|
|
11 |
|
12 |
|
13 |
class Platform(ABC):
|
@@ -32,6 +33,9 @@ class Platform(ABC):
|
|
32 |
@abstractmethod
|
33 |
def is_rocm(self) -> bool: ...
|
34 |
|
|
|
|
|
|
|
35 |
|
36 |
class CudaPlatform(Platform):
|
37 |
@classmethod
|
@@ -45,6 +49,9 @@ class CudaPlatform(Platform):
|
|
45 |
def is_rocm(self) -> bool:
|
46 |
return False
|
47 |
|
|
|
|
|
|
|
48 |
|
49 |
class RocmPlatform(Platform):
|
50 |
@classmethod
|
@@ -58,5 +65,28 @@ class RocmPlatform(Platform):
|
|
58 |
def is_rocm(self) -> bool:
|
59 |
return True
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
-
current_platform =
|
|
|
|
|
|
|
|
|
|
|
|
8 |
import torch
|
9 |
|
10 |
IS_ROCM = torch.version.hip is not None
|
11 |
+
IS_MPS = torch.backends.mps.is_available()
|
12 |
|
13 |
|
14 |
class Platform(ABC):
|
|
|
33 |
@abstractmethod
|
34 |
def is_rocm(self) -> bool: ...
|
35 |
|
36 |
+
@abstractmethod
|
37 |
+
def is_mps(self) -> bool: ...
|
38 |
+
|
39 |
|
40 |
class CudaPlatform(Platform):
|
41 |
@classmethod
|
|
|
49 |
def is_rocm(self) -> bool:
|
50 |
return False
|
51 |
|
52 |
+
def is_mps(self) -> bool:
|
53 |
+
return False
|
54 |
+
|
55 |
|
56 |
class RocmPlatform(Platform):
|
57 |
@classmethod
|
|
|
65 |
def is_rocm(self) -> bool:
|
66 |
return True
|
67 |
|
68 |
+
def is_mps(self) -> bool:
|
69 |
+
return False
|
70 |
+
|
71 |
+
|
72 |
+
class MpsPlatform(Platform):
|
73 |
+
@classmethod
|
74 |
+
@lru_cache(maxsize=8)
|
75 |
+
def get_device_name(cls, device_id: int = 0) -> str:
|
76 |
+
return torch.cuda.get_device_name(device_id)
|
77 |
+
|
78 |
+
def is_cuda(self) -> bool:
|
79 |
+
return False
|
80 |
+
|
81 |
+
def is_rocm(self) -> bool:
|
82 |
+
return False
|
83 |
+
|
84 |
+
def is_mps(self) -> bool:
|
85 |
+
return True
|
86 |
|
87 |
+
current_platform = (
|
88 |
+
RocmPlatform() if IS_ROCM else
|
89 |
+
MpsPlatform() if IS_MPS else
|
90 |
+
CudaPlatform() if torch.cuda.is_available() else
|
91 |
+
None
|
92 |
+
)
|
build/{torch25-cxx11-cu118-x86_64-linux β torch26-cxx11-rocm62-x86_64-linux}/paged_attention/__init__.py
RENAMED
File without changes
|
build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (510 Bytes). View file
|
|
build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc
ADDED
Binary file (4.71 kB). View file
|
|
build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc
ADDED
Binary file (548 Bytes). View file
|
|
build/{torch25-cxx11-cu118-x86_64-linux β torch26-cxx11-rocm62-x86_64-linux}/paged_attention/_custom_ops.py
RENAMED
File without changes
|
build/{torch25-cxx11-cu118-x86_64-linux β torch26-cxx11-rocm62-x86_64-linux}/paged_attention/_ops.py
RENAMED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _paged_attention_6677800
|
3 |
+
ops = torch.ops._paged_attention_6677800
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_paged_attention_6677800::{op_name}"
|
build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b4781f634ed1cceabfe9b00e485748ac4e179714ec5e8a3a67475b5a1072fb5
|
3 |
+
size 133021344
|
build/{torch25-cxx11-cu121-x86_64-linux β torch26-cxx11-rocm62-x86_64-linux}/paged_attention/platforms.py
RENAMED
@@ -8,6 +8,7 @@ import numpy as np
|
|
8 |
import torch
|
9 |
|
10 |
IS_ROCM = torch.version.hip is not None
|
|
|
11 |
|
12 |
|
13 |
class Platform(ABC):
|
@@ -32,6 +33,9 @@ class Platform(ABC):
|
|
32 |
@abstractmethod
|
33 |
def is_rocm(self) -> bool: ...
|
34 |
|
|
|
|
|
|
|
35 |
|
36 |
class CudaPlatform(Platform):
|
37 |
@classmethod
|
@@ -45,6 +49,9 @@ class CudaPlatform(Platform):
|
|
45 |
def is_rocm(self) -> bool:
|
46 |
return False
|
47 |
|
|
|
|
|
|
|
48 |
|
49 |
class RocmPlatform(Platform):
|
50 |
@classmethod
|
@@ -58,5 +65,28 @@ class RocmPlatform(Platform):
|
|
58 |
def is_rocm(self) -> bool:
|
59 |
return True
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
-
current_platform =
|
|
|
|
|
|
|
|
|
|
|
|
8 |
import torch
|
9 |
|
10 |
IS_ROCM = torch.version.hip is not None
|
11 |
+
IS_MPS = torch.backends.mps.is_available()
|
12 |
|
13 |
|
14 |
class Platform(ABC):
|
|
|
33 |
@abstractmethod
|
34 |
def is_rocm(self) -> bool: ...
|
35 |
|
36 |
+
@abstractmethod
|
37 |
+
def is_mps(self) -> bool: ...
|
38 |
+
|
39 |
|
40 |
class CudaPlatform(Platform):
|
41 |
@classmethod
|
|
|
49 |
def is_rocm(self) -> bool:
|
50 |
return False
|
51 |
|
52 |
+
def is_mps(self) -> bool:
|
53 |
+
return False
|
54 |
+
|
55 |
|
56 |
class RocmPlatform(Platform):
|
57 |
@classmethod
|
|
|
65 |
def is_rocm(self) -> bool:
|
66 |
return True
|
67 |
|
68 |
+
def is_mps(self) -> bool:
|
69 |
+
return False
|
70 |
+
|
71 |
+
|
72 |
+
class MpsPlatform(Platform):
|
73 |
+
@classmethod
|
74 |
+
@lru_cache(maxsize=8)
|
75 |
+
def get_device_name(cls, device_id: int = 0) -> str:
|
76 |
+
return torch.cuda.get_device_name(device_id)
|
77 |
+
|
78 |
+
def is_cuda(self) -> bool:
|
79 |
+
return False
|
80 |
+
|
81 |
+
def is_rocm(self) -> bool:
|
82 |
+
return False
|
83 |
+
|
84 |
+
def is_mps(self) -> bool:
|
85 |
+
return True
|
86 |
|
87 |
+
current_platform = (
|
88 |
+
RocmPlatform() if IS_ROCM else
|
89 |
+
MpsPlatform() if IS_MPS else
|
90 |
+
CudaPlatform() if torch.cuda.is_available() else
|
91 |
+
None
|
92 |
+
)
|
build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (509 Bytes). View file
|
|
build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc
ADDED
Binary file (4.7 kB). View file
|
|
build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc
ADDED
Binary file (547 Bytes). View file
|
|