danieldk HF Staff commited on
Commit
8b32f11
Β·
1 Parent(s): e6ce28c

Build (x86_64-linux)

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. build/torch25-cxx11-cu118-x86_64-linux/paged_attention/platforms.py +0 -62
  2. build/torch25-cxx11-cu121-x86_64-linux/paged_attention/_ops.py +0 -9
  3. build/torch25-cxx11-cu124-x86_64-linux/paged_attention/__init__.py +0 -21
  4. build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_custom_ops.py +0 -173
  5. build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_ops.py +0 -9
  6. build/torch25-cxx11-cu124-x86_64-linux/paged_attention/platforms.py +0 -62
  7. build/torch25-cxx98-cu118-x86_64-linux/paged_attention/__init__.py +0 -21
  8. build/torch25-cxx98-cu118-x86_64-linux/paged_attention/_custom_ops.py +0 -173
  9. build/torch25-cxx98-cu121-x86_64-linux/paged_attention/__init__.py +0 -21
  10. build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_custom_ops.py +0 -173
  11. build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_ops.py +0 -9
  12. build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so +0 -3
  13. build/torch25-cxx98-cu121-x86_64-linux/paged_attention/platforms.py +0 -62
  14. build/torch25-cxx98-cu124-x86_64-linux/paged_attention/__init__.py +0 -21
  15. build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_custom_ops.py +0 -173
  16. build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_ops.py +0 -9
  17. build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so +0 -3
  18. build/torch25-cxx98-cu124-x86_64-linux/paged_attention/platforms.py +0 -62
  19. build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc +0 -0
  20. build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
  21. build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
  22. build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_ops.py +3 -3
  23. build/{torch25-cxx11-cu121-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so β†’ torch26-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so} +2 -2
  24. build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so +0 -3
  25. build/torch26-cxx11-cu118-x86_64-linux/paged_attention/platforms.py +31 -1
  26. build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc +0 -0
  27. build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
  28. build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
  29. build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_ops.py +3 -3
  30. build/{torch25-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so β†’ torch26-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so} +2 -2
  31. build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so +0 -3
  32. build/torch26-cxx11-cu124-x86_64-linux/paged_attention/platforms.py +31 -1
  33. build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc +0 -0
  34. build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
  35. build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
  36. build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_ops.py +3 -3
  37. build/{torch25-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so β†’ torch26-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so} +2 -2
  38. build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so +0 -3
  39. build/torch26-cxx11-cu126-x86_64-linux/paged_attention/platforms.py +31 -1
  40. build/{torch25-cxx11-cu118-x86_64-linux β†’ torch26-cxx11-rocm62-x86_64-linux}/paged_attention/__init__.py +0 -0
  41. build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc +0 -0
  42. build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
  43. build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
  44. build/{torch25-cxx11-cu118-x86_64-linux β†’ torch26-cxx11-rocm62-x86_64-linux}/paged_attention/_custom_ops.py +0 -0
  45. build/{torch25-cxx11-cu118-x86_64-linux β†’ torch26-cxx11-rocm62-x86_64-linux}/paged_attention/_ops.py +3 -3
  46. build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so +3 -0
  47. build/{torch25-cxx11-cu121-x86_64-linux β†’ torch26-cxx11-rocm62-x86_64-linux}/paged_attention/platforms.py +31 -1
  48. build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc +0 -0
  49. build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
  50. build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
build/torch25-cxx11-cu118-x86_64-linux/paged_attention/platforms.py DELETED
@@ -1,62 +0,0 @@
1
- import os
2
- import random
3
- from abc import ABC, abstractmethod
4
- from functools import lru_cache, wraps
5
- from typing import Callable, ParamSpec, TypeVar
6
-
7
- import numpy as np
8
- import torch
9
-
10
- IS_ROCM = torch.version.hip is not None
11
-
12
-
13
- class Platform(ABC):
14
- @classmethod
15
- def seed_everything(cls, seed: int) -> None:
16
- """
17
- Set the seed of each random module.
18
- `torch.manual_seed` will set seed on all devices.
19
-
20
- Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
21
- """
22
- random.seed(seed)
23
- np.random.seed(seed)
24
- torch.manual_seed(seed)
25
-
26
- @abstractmethod
27
- def get_device_name(self, device_id: int = 0) -> str: ...
28
-
29
- @abstractmethod
30
- def is_cuda(self) -> bool: ...
31
-
32
- @abstractmethod
33
- def is_rocm(self) -> bool: ...
34
-
35
-
36
- class CudaPlatform(Platform):
37
- @classmethod
38
- @lru_cache(maxsize=8)
39
- def get_device_name(cls, device_id: int = 0) -> str:
40
- return torch.cuda.get_device_name(0)
41
-
42
- def is_cuda(self) -> bool:
43
- return True
44
-
45
- def is_rocm(self) -> bool:
46
- return False
47
-
48
-
49
- class RocmPlatform(Platform):
50
- @classmethod
51
- @lru_cache(maxsize=8)
52
- def get_device_name(cls, device_id: int = 0) -> str:
53
- return torch.cuda.get_device_name(device_id)
54
-
55
- def is_cuda(self) -> bool:
56
- return False
57
-
58
- def is_rocm(self) -> bool:
59
- return True
60
-
61
-
62
- current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch25-cxx11-cu121-x86_64-linux/paged_attention/_ops.py DELETED
@@ -1,9 +0,0 @@
1
- import torch
2
- from . import _paged_attention_daf6221
3
- ops = torch.ops._paged_attention_daf6221
4
-
5
- def add_op_namespace_prefix(op_name: str):
6
- """
7
- Prefix op by namespace.
8
- """
9
- return f"_paged_attention_daf6221::{op_name}"
 
 
 
 
 
 
 
 
 
 
build/torch25-cxx11-cu124-x86_64-linux/paged_attention/__init__.py DELETED
@@ -1,21 +0,0 @@
1
- from ._custom_ops import (
2
- convert_fp8,
3
- copy_blocks,
4
- paged_attention_v1,
5
- paged_attention_v2,
6
- reshape_and_cache,
7
- reshape_and_cache_flash,
8
- swap_blocks,
9
- )
10
- from ._ops import ops
11
-
12
- __all__ = [
13
- "convert_fp8",
14
- "copy_blocks",
15
- "ops",
16
- "paged_attention_v1",
17
- "paged_attention_v2",
18
- "reshape_and_cache",
19
- "reshape_and_cache_flash",
20
- "swap_blocks",
21
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_custom_ops.py DELETED
@@ -1,173 +0,0 @@
1
- from typing import List, Optional
2
-
3
- import torch
4
-
5
- from ._ops import ops
6
-
7
-
8
- # page attention ops
9
- def paged_attention_v1(
10
- out: torch.Tensor,
11
- query: torch.Tensor,
12
- key_cache: torch.Tensor,
13
- value_cache: torch.Tensor,
14
- num_kv_heads: int,
15
- scale: float,
16
- block_tables: torch.Tensor,
17
- seq_lens: torch.Tensor,
18
- block_size: int,
19
- max_seq_len: int,
20
- alibi_slopes: Optional[torch.Tensor],
21
- kv_cache_dtype: str,
22
- k_scale: float,
23
- v_scale: float,
24
- tp_rank: int = 0,
25
- blocksparse_local_blocks: int = 0,
26
- blocksparse_vert_stride: int = 0,
27
- blocksparse_block_size: int = 64,
28
- blocksparse_head_sliding_step: int = 0,
29
- ) -> None:
30
- ops.paged_attention_v1(
31
- out,
32
- query,
33
- key_cache,
34
- value_cache,
35
- num_kv_heads,
36
- scale,
37
- block_tables,
38
- seq_lens,
39
- block_size,
40
- max_seq_len,
41
- alibi_slopes,
42
- kv_cache_dtype,
43
- k_scale,
44
- v_scale,
45
- tp_rank,
46
- blocksparse_local_blocks,
47
- blocksparse_vert_stride,
48
- blocksparse_block_size,
49
- blocksparse_head_sliding_step,
50
- )
51
-
52
-
53
- def paged_attention_v2(
54
- out: torch.Tensor,
55
- exp_sum: torch.Tensor,
56
- max_logits: torch.Tensor,
57
- tmp_out: torch.Tensor,
58
- query: torch.Tensor,
59
- key_cache: torch.Tensor,
60
- value_cache: torch.Tensor,
61
- num_kv_heads: int,
62
- scale: float,
63
- block_tables: torch.Tensor,
64
- seq_lens: torch.Tensor,
65
- block_size: int,
66
- max_seq_len: int,
67
- alibi_slopes: Optional[torch.Tensor],
68
- kv_cache_dtype: str,
69
- k_scale: float,
70
- v_scale: float,
71
- tp_rank: int = 0,
72
- blocksparse_local_blocks: int = 0,
73
- blocksparse_vert_stride: int = 0,
74
- blocksparse_block_size: int = 64,
75
- blocksparse_head_sliding_step: int = 0,
76
- ) -> None:
77
- ops.paged_attention_v2(
78
- out,
79
- exp_sum,
80
- max_logits,
81
- tmp_out,
82
- query,
83
- key_cache,
84
- value_cache,
85
- num_kv_heads,
86
- scale,
87
- block_tables,
88
- seq_lens,
89
- block_size,
90
- max_seq_len,
91
- alibi_slopes,
92
- kv_cache_dtype,
93
- k_scale,
94
- v_scale,
95
- tp_rank,
96
- blocksparse_local_blocks,
97
- blocksparse_vert_stride,
98
- blocksparse_block_size,
99
- blocksparse_head_sliding_step,
100
- )
101
-
102
-
103
- def reshape_and_cache(
104
- key: torch.Tensor,
105
- value: torch.Tensor,
106
- key_cache: torch.Tensor,
107
- value_cache: torch.Tensor,
108
- slot_mapping: torch.Tensor,
109
- kv_cache_dtype: str,
110
- k_scale: float,
111
- v_scale: float,
112
- ) -> None:
113
- ops.reshape_and_cache(
114
- key,
115
- value,
116
- key_cache,
117
- value_cache,
118
- slot_mapping,
119
- kv_cache_dtype,
120
- k_scale,
121
- v_scale,
122
- )
123
-
124
-
125
- def reshape_and_cache_flash(
126
- key: torch.Tensor,
127
- value: torch.Tensor,
128
- key_cache: torch.Tensor,
129
- value_cache: torch.Tensor,
130
- slot_mapping: torch.Tensor,
131
- kv_cache_dtype: str,
132
- k_scale: torch.Tensor,
133
- v_scale: torch.Tensor,
134
- ) -> None:
135
- ops.reshape_and_cache_flash(
136
- key,
137
- value,
138
- key_cache,
139
- value_cache,
140
- slot_mapping,
141
- kv_cache_dtype,
142
- k_scale,
143
- v_scale,
144
- )
145
-
146
-
147
- def copy_blocks(
148
- key_caches: List[torch.Tensor],
149
- value_caches: List[torch.Tensor],
150
- block_mapping: torch.Tensor,
151
- ) -> None:
152
- ops.copy_blocks(key_caches, value_caches, block_mapping)
153
-
154
-
155
- def swap_blocks(
156
- src: torch.Tensor, dst: torch.Tensor, block_mapping: torch.Tensor
157
- ) -> None:
158
- ops.swap_blocks(src, dst, block_mapping)
159
-
160
-
161
- def convert_fp8(
162
- output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8"
163
- ) -> None:
164
- ops.convert_fp8(output, input, scale, kv_dtype)
165
-
166
-
167
- __all__ = [
168
- "convert_fp8",
169
- "paged_attention_v1",
170
- "paged_attention_v2",
171
- "reshape_and_cache",
172
- "copy_blocks",
173
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_ops.py DELETED
@@ -1,9 +0,0 @@
1
- import torch
2
- from . import _paged_attention_daf6221
3
- ops = torch.ops._paged_attention_daf6221
4
-
5
- def add_op_namespace_prefix(op_name: str):
6
- """
7
- Prefix op by namespace.
8
- """
9
- return f"_paged_attention_daf6221::{op_name}"
 
 
 
 
 
 
 
 
 
 
build/torch25-cxx11-cu124-x86_64-linux/paged_attention/platforms.py DELETED
@@ -1,62 +0,0 @@
1
- import os
2
- import random
3
- from abc import ABC, abstractmethod
4
- from functools import lru_cache, wraps
5
- from typing import Callable, ParamSpec, TypeVar
6
-
7
- import numpy as np
8
- import torch
9
-
10
- IS_ROCM = torch.version.hip is not None
11
-
12
-
13
- class Platform(ABC):
14
- @classmethod
15
- def seed_everything(cls, seed: int) -> None:
16
- """
17
- Set the seed of each random module.
18
- `torch.manual_seed` will set seed on all devices.
19
-
20
- Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
21
- """
22
- random.seed(seed)
23
- np.random.seed(seed)
24
- torch.manual_seed(seed)
25
-
26
- @abstractmethod
27
- def get_device_name(self, device_id: int = 0) -> str: ...
28
-
29
- @abstractmethod
30
- def is_cuda(self) -> bool: ...
31
-
32
- @abstractmethod
33
- def is_rocm(self) -> bool: ...
34
-
35
-
36
- class CudaPlatform(Platform):
37
- @classmethod
38
- @lru_cache(maxsize=8)
39
- def get_device_name(cls, device_id: int = 0) -> str:
40
- return torch.cuda.get_device_name(0)
41
-
42
- def is_cuda(self) -> bool:
43
- return True
44
-
45
- def is_rocm(self) -> bool:
46
- return False
47
-
48
-
49
- class RocmPlatform(Platform):
50
- @classmethod
51
- @lru_cache(maxsize=8)
52
- def get_device_name(cls, device_id: int = 0) -> str:
53
- return torch.cuda.get_device_name(device_id)
54
-
55
- def is_cuda(self) -> bool:
56
- return False
57
-
58
- def is_rocm(self) -> bool:
59
- return True
60
-
61
-
62
- current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch25-cxx98-cu118-x86_64-linux/paged_attention/__init__.py DELETED
@@ -1,21 +0,0 @@
1
- from ._custom_ops import (
2
- convert_fp8,
3
- copy_blocks,
4
- paged_attention_v1,
5
- paged_attention_v2,
6
- reshape_and_cache,
7
- reshape_and_cache_flash,
8
- swap_blocks,
9
- )
10
- from ._ops import ops
11
-
12
- __all__ = [
13
- "convert_fp8",
14
- "copy_blocks",
15
- "ops",
16
- "paged_attention_v1",
17
- "paged_attention_v2",
18
- "reshape_and_cache",
19
- "reshape_and_cache_flash",
20
- "swap_blocks",
21
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch25-cxx98-cu118-x86_64-linux/paged_attention/_custom_ops.py DELETED
@@ -1,173 +0,0 @@
1
- from typing import List, Optional
2
-
3
- import torch
4
-
5
- from ._ops import ops
6
-
7
-
8
- # page attention ops
9
- def paged_attention_v1(
10
- out: torch.Tensor,
11
- query: torch.Tensor,
12
- key_cache: torch.Tensor,
13
- value_cache: torch.Tensor,
14
- num_kv_heads: int,
15
- scale: float,
16
- block_tables: torch.Tensor,
17
- seq_lens: torch.Tensor,
18
- block_size: int,
19
- max_seq_len: int,
20
- alibi_slopes: Optional[torch.Tensor],
21
- kv_cache_dtype: str,
22
- k_scale: float,
23
- v_scale: float,
24
- tp_rank: int = 0,
25
- blocksparse_local_blocks: int = 0,
26
- blocksparse_vert_stride: int = 0,
27
- blocksparse_block_size: int = 64,
28
- blocksparse_head_sliding_step: int = 0,
29
- ) -> None:
30
- ops.paged_attention_v1(
31
- out,
32
- query,
33
- key_cache,
34
- value_cache,
35
- num_kv_heads,
36
- scale,
37
- block_tables,
38
- seq_lens,
39
- block_size,
40
- max_seq_len,
41
- alibi_slopes,
42
- kv_cache_dtype,
43
- k_scale,
44
- v_scale,
45
- tp_rank,
46
- blocksparse_local_blocks,
47
- blocksparse_vert_stride,
48
- blocksparse_block_size,
49
- blocksparse_head_sliding_step,
50
- )
51
-
52
-
53
- def paged_attention_v2(
54
- out: torch.Tensor,
55
- exp_sum: torch.Tensor,
56
- max_logits: torch.Tensor,
57
- tmp_out: torch.Tensor,
58
- query: torch.Tensor,
59
- key_cache: torch.Tensor,
60
- value_cache: torch.Tensor,
61
- num_kv_heads: int,
62
- scale: float,
63
- block_tables: torch.Tensor,
64
- seq_lens: torch.Tensor,
65
- block_size: int,
66
- max_seq_len: int,
67
- alibi_slopes: Optional[torch.Tensor],
68
- kv_cache_dtype: str,
69
- k_scale: float,
70
- v_scale: float,
71
- tp_rank: int = 0,
72
- blocksparse_local_blocks: int = 0,
73
- blocksparse_vert_stride: int = 0,
74
- blocksparse_block_size: int = 64,
75
- blocksparse_head_sliding_step: int = 0,
76
- ) -> None:
77
- ops.paged_attention_v2(
78
- out,
79
- exp_sum,
80
- max_logits,
81
- tmp_out,
82
- query,
83
- key_cache,
84
- value_cache,
85
- num_kv_heads,
86
- scale,
87
- block_tables,
88
- seq_lens,
89
- block_size,
90
- max_seq_len,
91
- alibi_slopes,
92
- kv_cache_dtype,
93
- k_scale,
94
- v_scale,
95
- tp_rank,
96
- blocksparse_local_blocks,
97
- blocksparse_vert_stride,
98
- blocksparse_block_size,
99
- blocksparse_head_sliding_step,
100
- )
101
-
102
-
103
- def reshape_and_cache(
104
- key: torch.Tensor,
105
- value: torch.Tensor,
106
- key_cache: torch.Tensor,
107
- value_cache: torch.Tensor,
108
- slot_mapping: torch.Tensor,
109
- kv_cache_dtype: str,
110
- k_scale: float,
111
- v_scale: float,
112
- ) -> None:
113
- ops.reshape_and_cache(
114
- key,
115
- value,
116
- key_cache,
117
- value_cache,
118
- slot_mapping,
119
- kv_cache_dtype,
120
- k_scale,
121
- v_scale,
122
- )
123
-
124
-
125
- def reshape_and_cache_flash(
126
- key: torch.Tensor,
127
- value: torch.Tensor,
128
- key_cache: torch.Tensor,
129
- value_cache: torch.Tensor,
130
- slot_mapping: torch.Tensor,
131
- kv_cache_dtype: str,
132
- k_scale: torch.Tensor,
133
- v_scale: torch.Tensor,
134
- ) -> None:
135
- ops.reshape_and_cache_flash(
136
- key,
137
- value,
138
- key_cache,
139
- value_cache,
140
- slot_mapping,
141
- kv_cache_dtype,
142
- k_scale,
143
- v_scale,
144
- )
145
-
146
-
147
- def copy_blocks(
148
- key_caches: List[torch.Tensor],
149
- value_caches: List[torch.Tensor],
150
- block_mapping: torch.Tensor,
151
- ) -> None:
152
- ops.copy_blocks(key_caches, value_caches, block_mapping)
153
-
154
-
155
- def swap_blocks(
156
- src: torch.Tensor, dst: torch.Tensor, block_mapping: torch.Tensor
157
- ) -> None:
158
- ops.swap_blocks(src, dst, block_mapping)
159
-
160
-
161
- def convert_fp8(
162
- output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8"
163
- ) -> None:
164
- ops.convert_fp8(output, input, scale, kv_dtype)
165
-
166
-
167
- __all__ = [
168
- "convert_fp8",
169
- "paged_attention_v1",
170
- "paged_attention_v2",
171
- "reshape_and_cache",
172
- "copy_blocks",
173
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch25-cxx98-cu121-x86_64-linux/paged_attention/__init__.py DELETED
@@ -1,21 +0,0 @@
1
- from ._custom_ops import (
2
- convert_fp8,
3
- copy_blocks,
4
- paged_attention_v1,
5
- paged_attention_v2,
6
- reshape_and_cache,
7
- reshape_and_cache_flash,
8
- swap_blocks,
9
- )
10
- from ._ops import ops
11
-
12
- __all__ = [
13
- "convert_fp8",
14
- "copy_blocks",
15
- "ops",
16
- "paged_attention_v1",
17
- "paged_attention_v2",
18
- "reshape_and_cache",
19
- "reshape_and_cache_flash",
20
- "swap_blocks",
21
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_custom_ops.py DELETED
@@ -1,173 +0,0 @@
1
- from typing import List, Optional
2
-
3
- import torch
4
-
5
- from ._ops import ops
6
-
7
-
8
- # page attention ops
9
- def paged_attention_v1(
10
- out: torch.Tensor,
11
- query: torch.Tensor,
12
- key_cache: torch.Tensor,
13
- value_cache: torch.Tensor,
14
- num_kv_heads: int,
15
- scale: float,
16
- block_tables: torch.Tensor,
17
- seq_lens: torch.Tensor,
18
- block_size: int,
19
- max_seq_len: int,
20
- alibi_slopes: Optional[torch.Tensor],
21
- kv_cache_dtype: str,
22
- k_scale: float,
23
- v_scale: float,
24
- tp_rank: int = 0,
25
- blocksparse_local_blocks: int = 0,
26
- blocksparse_vert_stride: int = 0,
27
- blocksparse_block_size: int = 64,
28
- blocksparse_head_sliding_step: int = 0,
29
- ) -> None:
30
- ops.paged_attention_v1(
31
- out,
32
- query,
33
- key_cache,
34
- value_cache,
35
- num_kv_heads,
36
- scale,
37
- block_tables,
38
- seq_lens,
39
- block_size,
40
- max_seq_len,
41
- alibi_slopes,
42
- kv_cache_dtype,
43
- k_scale,
44
- v_scale,
45
- tp_rank,
46
- blocksparse_local_blocks,
47
- blocksparse_vert_stride,
48
- blocksparse_block_size,
49
- blocksparse_head_sliding_step,
50
- )
51
-
52
-
53
- def paged_attention_v2(
54
- out: torch.Tensor,
55
- exp_sum: torch.Tensor,
56
- max_logits: torch.Tensor,
57
- tmp_out: torch.Tensor,
58
- query: torch.Tensor,
59
- key_cache: torch.Tensor,
60
- value_cache: torch.Tensor,
61
- num_kv_heads: int,
62
- scale: float,
63
- block_tables: torch.Tensor,
64
- seq_lens: torch.Tensor,
65
- block_size: int,
66
- max_seq_len: int,
67
- alibi_slopes: Optional[torch.Tensor],
68
- kv_cache_dtype: str,
69
- k_scale: float,
70
- v_scale: float,
71
- tp_rank: int = 0,
72
- blocksparse_local_blocks: int = 0,
73
- blocksparse_vert_stride: int = 0,
74
- blocksparse_block_size: int = 64,
75
- blocksparse_head_sliding_step: int = 0,
76
- ) -> None:
77
- ops.paged_attention_v2(
78
- out,
79
- exp_sum,
80
- max_logits,
81
- tmp_out,
82
- query,
83
- key_cache,
84
- value_cache,
85
- num_kv_heads,
86
- scale,
87
- block_tables,
88
- seq_lens,
89
- block_size,
90
- max_seq_len,
91
- alibi_slopes,
92
- kv_cache_dtype,
93
- k_scale,
94
- v_scale,
95
- tp_rank,
96
- blocksparse_local_blocks,
97
- blocksparse_vert_stride,
98
- blocksparse_block_size,
99
- blocksparse_head_sliding_step,
100
- )
101
-
102
-
103
- def reshape_and_cache(
104
- key: torch.Tensor,
105
- value: torch.Tensor,
106
- key_cache: torch.Tensor,
107
- value_cache: torch.Tensor,
108
- slot_mapping: torch.Tensor,
109
- kv_cache_dtype: str,
110
- k_scale: float,
111
- v_scale: float,
112
- ) -> None:
113
- ops.reshape_and_cache(
114
- key,
115
- value,
116
- key_cache,
117
- value_cache,
118
- slot_mapping,
119
- kv_cache_dtype,
120
- k_scale,
121
- v_scale,
122
- )
123
-
124
-
125
- def reshape_and_cache_flash(
126
- key: torch.Tensor,
127
- value: torch.Tensor,
128
- key_cache: torch.Tensor,
129
- value_cache: torch.Tensor,
130
- slot_mapping: torch.Tensor,
131
- kv_cache_dtype: str,
132
- k_scale: torch.Tensor,
133
- v_scale: torch.Tensor,
134
- ) -> None:
135
- ops.reshape_and_cache_flash(
136
- key,
137
- value,
138
- key_cache,
139
- value_cache,
140
- slot_mapping,
141
- kv_cache_dtype,
142
- k_scale,
143
- v_scale,
144
- )
145
-
146
-
147
- def copy_blocks(
148
- key_caches: List[torch.Tensor],
149
- value_caches: List[torch.Tensor],
150
- block_mapping: torch.Tensor,
151
- ) -> None:
152
- ops.copy_blocks(key_caches, value_caches, block_mapping)
153
-
154
-
155
- def swap_blocks(
156
- src: torch.Tensor, dst: torch.Tensor, block_mapping: torch.Tensor
157
- ) -> None:
158
- ops.swap_blocks(src, dst, block_mapping)
159
-
160
-
161
- def convert_fp8(
162
- output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8"
163
- ) -> None:
164
- ops.convert_fp8(output, input, scale, kv_dtype)
165
-
166
-
167
- __all__ = [
168
- "convert_fp8",
169
- "paged_attention_v1",
170
- "paged_attention_v2",
171
- "reshape_and_cache",
172
- "copy_blocks",
173
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_ops.py DELETED
@@ -1,9 +0,0 @@
1
- import torch
2
- from . import _paged_attention_daf6221
3
- ops = torch.ops._paged_attention_daf6221
4
-
5
- def add_op_namespace_prefix(op_name: str):
6
- """
7
- Prefix op by namespace.
8
- """
9
- return f"_paged_attention_daf6221::{op_name}"
 
 
 
 
 
 
 
 
 
 
build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:684a1670bd113fee852580728961adcc53b7adb8e563aa672df223e7bff0c9a6
3
- size 87913456
 
 
 
 
build/torch25-cxx98-cu121-x86_64-linux/paged_attention/platforms.py DELETED
@@ -1,62 +0,0 @@
1
- import os
2
- import random
3
- from abc import ABC, abstractmethod
4
- from functools import lru_cache, wraps
5
- from typing import Callable, ParamSpec, TypeVar
6
-
7
- import numpy as np
8
- import torch
9
-
10
- IS_ROCM = torch.version.hip is not None
11
-
12
-
13
- class Platform(ABC):
14
- @classmethod
15
- def seed_everything(cls, seed: int) -> None:
16
- """
17
- Set the seed of each random module.
18
- `torch.manual_seed` will set seed on all devices.
19
-
20
- Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
21
- """
22
- random.seed(seed)
23
- np.random.seed(seed)
24
- torch.manual_seed(seed)
25
-
26
- @abstractmethod
27
- def get_device_name(self, device_id: int = 0) -> str: ...
28
-
29
- @abstractmethod
30
- def is_cuda(self) -> bool: ...
31
-
32
- @abstractmethod
33
- def is_rocm(self) -> bool: ...
34
-
35
-
36
- class CudaPlatform(Platform):
37
- @classmethod
38
- @lru_cache(maxsize=8)
39
- def get_device_name(cls, device_id: int = 0) -> str:
40
- return torch.cuda.get_device_name(0)
41
-
42
- def is_cuda(self) -> bool:
43
- return True
44
-
45
- def is_rocm(self) -> bool:
46
- return False
47
-
48
-
49
- class RocmPlatform(Platform):
50
- @classmethod
51
- @lru_cache(maxsize=8)
52
- def get_device_name(cls, device_id: int = 0) -> str:
53
- return torch.cuda.get_device_name(device_id)
54
-
55
- def is_cuda(self) -> bool:
56
- return False
57
-
58
- def is_rocm(self) -> bool:
59
- return True
60
-
61
-
62
- current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch25-cxx98-cu124-x86_64-linux/paged_attention/__init__.py DELETED
@@ -1,21 +0,0 @@
1
- from ._custom_ops import (
2
- convert_fp8,
3
- copy_blocks,
4
- paged_attention_v1,
5
- paged_attention_v2,
6
- reshape_and_cache,
7
- reshape_and_cache_flash,
8
- swap_blocks,
9
- )
10
- from ._ops import ops
11
-
12
- __all__ = [
13
- "convert_fp8",
14
- "copy_blocks",
15
- "ops",
16
- "paged_attention_v1",
17
- "paged_attention_v2",
18
- "reshape_and_cache",
19
- "reshape_and_cache_flash",
20
- "swap_blocks",
21
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_custom_ops.py DELETED
@@ -1,173 +0,0 @@
1
- from typing import List, Optional
2
-
3
- import torch
4
-
5
- from ._ops import ops
6
-
7
-
8
- # page attention ops
9
- def paged_attention_v1(
10
- out: torch.Tensor,
11
- query: torch.Tensor,
12
- key_cache: torch.Tensor,
13
- value_cache: torch.Tensor,
14
- num_kv_heads: int,
15
- scale: float,
16
- block_tables: torch.Tensor,
17
- seq_lens: torch.Tensor,
18
- block_size: int,
19
- max_seq_len: int,
20
- alibi_slopes: Optional[torch.Tensor],
21
- kv_cache_dtype: str,
22
- k_scale: float,
23
- v_scale: float,
24
- tp_rank: int = 0,
25
- blocksparse_local_blocks: int = 0,
26
- blocksparse_vert_stride: int = 0,
27
- blocksparse_block_size: int = 64,
28
- blocksparse_head_sliding_step: int = 0,
29
- ) -> None:
30
- ops.paged_attention_v1(
31
- out,
32
- query,
33
- key_cache,
34
- value_cache,
35
- num_kv_heads,
36
- scale,
37
- block_tables,
38
- seq_lens,
39
- block_size,
40
- max_seq_len,
41
- alibi_slopes,
42
- kv_cache_dtype,
43
- k_scale,
44
- v_scale,
45
- tp_rank,
46
- blocksparse_local_blocks,
47
- blocksparse_vert_stride,
48
- blocksparse_block_size,
49
- blocksparse_head_sliding_step,
50
- )
51
-
52
-
53
- def paged_attention_v2(
54
- out: torch.Tensor,
55
- exp_sum: torch.Tensor,
56
- max_logits: torch.Tensor,
57
- tmp_out: torch.Tensor,
58
- query: torch.Tensor,
59
- key_cache: torch.Tensor,
60
- value_cache: torch.Tensor,
61
- num_kv_heads: int,
62
- scale: float,
63
- block_tables: torch.Tensor,
64
- seq_lens: torch.Tensor,
65
- block_size: int,
66
- max_seq_len: int,
67
- alibi_slopes: Optional[torch.Tensor],
68
- kv_cache_dtype: str,
69
- k_scale: float,
70
- v_scale: float,
71
- tp_rank: int = 0,
72
- blocksparse_local_blocks: int = 0,
73
- blocksparse_vert_stride: int = 0,
74
- blocksparse_block_size: int = 64,
75
- blocksparse_head_sliding_step: int = 0,
76
- ) -> None:
77
- ops.paged_attention_v2(
78
- out,
79
- exp_sum,
80
- max_logits,
81
- tmp_out,
82
- query,
83
- key_cache,
84
- value_cache,
85
- num_kv_heads,
86
- scale,
87
- block_tables,
88
- seq_lens,
89
- block_size,
90
- max_seq_len,
91
- alibi_slopes,
92
- kv_cache_dtype,
93
- k_scale,
94
- v_scale,
95
- tp_rank,
96
- blocksparse_local_blocks,
97
- blocksparse_vert_stride,
98
- blocksparse_block_size,
99
- blocksparse_head_sliding_step,
100
- )
101
-
102
-
103
- def reshape_and_cache(
104
- key: torch.Tensor,
105
- value: torch.Tensor,
106
- key_cache: torch.Tensor,
107
- value_cache: torch.Tensor,
108
- slot_mapping: torch.Tensor,
109
- kv_cache_dtype: str,
110
- k_scale: float,
111
- v_scale: float,
112
- ) -> None:
113
- ops.reshape_and_cache(
114
- key,
115
- value,
116
- key_cache,
117
- value_cache,
118
- slot_mapping,
119
- kv_cache_dtype,
120
- k_scale,
121
- v_scale,
122
- )
123
-
124
-
125
- def reshape_and_cache_flash(
126
- key: torch.Tensor,
127
- value: torch.Tensor,
128
- key_cache: torch.Tensor,
129
- value_cache: torch.Tensor,
130
- slot_mapping: torch.Tensor,
131
- kv_cache_dtype: str,
132
- k_scale: torch.Tensor,
133
- v_scale: torch.Tensor,
134
- ) -> None:
135
- ops.reshape_and_cache_flash(
136
- key,
137
- value,
138
- key_cache,
139
- value_cache,
140
- slot_mapping,
141
- kv_cache_dtype,
142
- k_scale,
143
- v_scale,
144
- )
145
-
146
-
147
- def copy_blocks(
148
- key_caches: List[torch.Tensor],
149
- value_caches: List[torch.Tensor],
150
- block_mapping: torch.Tensor,
151
- ) -> None:
152
- ops.copy_blocks(key_caches, value_caches, block_mapping)
153
-
154
-
155
- def swap_blocks(
156
- src: torch.Tensor, dst: torch.Tensor, block_mapping: torch.Tensor
157
- ) -> None:
158
- ops.swap_blocks(src, dst, block_mapping)
159
-
160
-
161
- def convert_fp8(
162
- output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8"
163
- ) -> None:
164
- ops.convert_fp8(output, input, scale, kv_dtype)
165
-
166
-
167
- __all__ = [
168
- "convert_fp8",
169
- "paged_attention_v1",
170
- "paged_attention_v2",
171
- "reshape_and_cache",
172
- "copy_blocks",
173
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_ops.py DELETED
@@ -1,9 +0,0 @@
1
- import torch
2
- from . import _paged_attention_daf6221
3
- ops = torch.ops._paged_attention_daf6221
4
-
5
- def add_op_namespace_prefix(op_name: str):
6
- """
7
- Prefix op by namespace.
8
- """
9
- return f"_paged_attention_daf6221::{op_name}"
 
 
 
 
 
 
 
 
 
 
build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4fb3e5dd163dfce3d3032ff1969be0e96eff1568312935c8747beb402d6a2fd
3
- size 88021624
 
 
 
 
build/torch25-cxx98-cu124-x86_64-linux/paged_attention/platforms.py DELETED
@@ -1,62 +0,0 @@
1
- import os
2
- import random
3
- from abc import ABC, abstractmethod
4
- from functools import lru_cache, wraps
5
- from typing import Callable, ParamSpec, TypeVar
6
-
7
- import numpy as np
8
- import torch
9
-
10
- IS_ROCM = torch.version.hip is not None
11
-
12
-
13
- class Platform(ABC):
14
- @classmethod
15
- def seed_everything(cls, seed: int) -> None:
16
- """
17
- Set the seed of each random module.
18
- `torch.manual_seed` will set seed on all devices.
19
-
20
- Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
21
- """
22
- random.seed(seed)
23
- np.random.seed(seed)
24
- torch.manual_seed(seed)
25
-
26
- @abstractmethod
27
- def get_device_name(self, device_id: int = 0) -> str: ...
28
-
29
- @abstractmethod
30
- def is_cuda(self) -> bool: ...
31
-
32
- @abstractmethod
33
- def is_rocm(self) -> bool: ...
34
-
35
-
36
- class CudaPlatform(Platform):
37
- @classmethod
38
- @lru_cache(maxsize=8)
39
- def get_device_name(cls, device_id: int = 0) -> str:
40
- return torch.cuda.get_device_name(0)
41
-
42
- def is_cuda(self) -> bool:
43
- return True
44
-
45
- def is_rocm(self) -> bool:
46
- return False
47
-
48
-
49
- class RocmPlatform(Platform):
50
- @classmethod
51
- @lru_cache(maxsize=8)
52
- def get_device_name(cls, device_id: int = 0) -> str:
53
- return torch.cuda.get_device_name(device_id)
54
-
55
- def is_cuda(self) -> bool:
56
- return False
57
-
58
- def is_rocm(self) -> bool:
59
- return True
60
-
61
-
62
- current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (509 Bytes). View file
 
build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc ADDED
Binary file (4.7 kB). View file
 
build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc ADDED
Binary file (547 Bytes). View file
 
build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _paged_attention_daf6221
3
- ops = torch.ops._paged_attention_daf6221
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_paged_attention_daf6221::{op_name}"
 
1
  import torch
2
+ from . import _paged_attention_6677800
3
+ ops = torch.ops._paged_attention_6677800
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_paged_attention_6677800::{op_name}"
build/{torch25-cxx11-cu121-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so β†’ torch26-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2156391e81ab2d76386131056f595c50c00f0d68c2427ab0872a457385f3c804
3
- size 87948952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08e178e566aba62bbe98ba07bb0f83784095cb0d2fec7946a8d5773ca8e550ae
3
+ size 91845160
build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:14b647a190ade84ab061629eb9cfbf7bf104e653c6389609e85e92fa359a828e
3
- size 91219624
 
 
 
 
build/torch26-cxx11-cu118-x86_64-linux/paged_attention/platforms.py CHANGED
@@ -8,6 +8,7 @@ import numpy as np
8
  import torch
9
 
10
  IS_ROCM = torch.version.hip is not None
 
11
 
12
 
13
  class Platform(ABC):
@@ -32,6 +33,9 @@ class Platform(ABC):
32
  @abstractmethod
33
  def is_rocm(self) -> bool: ...
34
 
 
 
 
35
 
36
  class CudaPlatform(Platform):
37
  @classmethod
@@ -45,6 +49,9 @@ class CudaPlatform(Platform):
45
  def is_rocm(self) -> bool:
46
  return False
47
 
 
 
 
48
 
49
  class RocmPlatform(Platform):
50
  @classmethod
@@ -58,5 +65,28 @@ class RocmPlatform(Platform):
58
  def is_rocm(self) -> bool:
59
  return True
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
 
 
 
 
 
 
8
  import torch
9
 
10
  IS_ROCM = torch.version.hip is not None
11
+ IS_MPS = torch.backends.mps.is_available()
12
 
13
 
14
  class Platform(ABC):
 
33
  @abstractmethod
34
  def is_rocm(self) -> bool: ...
35
 
36
+ @abstractmethod
37
+ def is_mps(self) -> bool: ...
38
+
39
 
40
  class CudaPlatform(Platform):
41
  @classmethod
 
49
  def is_rocm(self) -> bool:
50
  return False
51
 
52
+ def is_mps(self) -> bool:
53
+ return False
54
+
55
 
56
  class RocmPlatform(Platform):
57
  @classmethod
 
65
  def is_rocm(self) -> bool:
66
  return True
67
 
68
+ def is_mps(self) -> bool:
69
+ return False
70
+
71
+
72
+ class MpsPlatform(Platform):
73
+ @classmethod
74
+ @lru_cache(maxsize=8)
75
+ def get_device_name(cls, device_id: int = 0) -> str:
76
+ return torch.cuda.get_device_name(device_id)
77
+
78
+ def is_cuda(self) -> bool:
79
+ return False
80
+
81
+ def is_rocm(self) -> bool:
82
+ return False
83
+
84
+ def is_mps(self) -> bool:
85
+ return True
86
 
87
+ current_platform = (
88
+ RocmPlatform() if IS_ROCM else
89
+ MpsPlatform() if IS_MPS else
90
+ CudaPlatform() if torch.cuda.is_available() else
91
+ None
92
+ )
build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (509 Bytes). View file
 
build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc ADDED
Binary file (4.7 kB). View file
 
build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc ADDED
Binary file (547 Bytes). View file
 
build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _paged_attention_daf6221
3
- ops = torch.ops._paged_attention_daf6221
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_paged_attention_daf6221::{op_name}"
 
1
  import torch
2
+ from . import _paged_attention_6677800
3
+ ops = torch.ops._paged_attention_6677800
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_paged_attention_6677800::{op_name}"
build/{torch25-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so β†’ torch26-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:353c8fd8c18d99439153c6db3dc20b5f1834d6a6ba64e9d8f9a03cc5c196d2c2
3
- size 91244024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bc02dd09d997be7c2d3c996d5716ff269b4e4094b6cab70f4ae73c3763c36aa
3
+ size 88666456
build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7faf4c16cf3cae3d4feead6195ece3f0865e602df4b04804e075697b72d5afa
3
- size 88040824
 
 
 
 
build/torch26-cxx11-cu124-x86_64-linux/paged_attention/platforms.py CHANGED
@@ -8,6 +8,7 @@ import numpy as np
8
  import torch
9
 
10
  IS_ROCM = torch.version.hip is not None
 
11
 
12
 
13
  class Platform(ABC):
@@ -32,6 +33,9 @@ class Platform(ABC):
32
  @abstractmethod
33
  def is_rocm(self) -> bool: ...
34
 
 
 
 
35
 
36
  class CudaPlatform(Platform):
37
  @classmethod
@@ -45,6 +49,9 @@ class CudaPlatform(Platform):
45
  def is_rocm(self) -> bool:
46
  return False
47
 
 
 
 
48
 
49
  class RocmPlatform(Platform):
50
  @classmethod
@@ -58,5 +65,28 @@ class RocmPlatform(Platform):
58
  def is_rocm(self) -> bool:
59
  return True
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
 
 
 
 
 
 
8
  import torch
9
 
10
  IS_ROCM = torch.version.hip is not None
11
+ IS_MPS = torch.backends.mps.is_available()
12
 
13
 
14
  class Platform(ABC):
 
33
  @abstractmethod
34
  def is_rocm(self) -> bool: ...
35
 
36
+ @abstractmethod
37
+ def is_mps(self) -> bool: ...
38
+
39
 
40
  class CudaPlatform(Platform):
41
  @classmethod
 
49
  def is_rocm(self) -> bool:
50
  return False
51
 
52
+ def is_mps(self) -> bool:
53
+ return False
54
+
55
 
56
  class RocmPlatform(Platform):
57
  @classmethod
 
65
  def is_rocm(self) -> bool:
66
  return True
67
 
68
+ def is_mps(self) -> bool:
69
+ return False
70
+
71
+
72
+ class MpsPlatform(Platform):
73
+ @classmethod
74
+ @lru_cache(maxsize=8)
75
+ def get_device_name(cls, device_id: int = 0) -> str:
76
+ return torch.cuda.get_device_name(device_id)
77
+
78
+ def is_cuda(self) -> bool:
79
+ return False
80
+
81
+ def is_rocm(self) -> bool:
82
+ return False
83
+
84
+ def is_mps(self) -> bool:
85
+ return True
86
 
87
+ current_platform = (
88
+ RocmPlatform() if IS_ROCM else
89
+ MpsPlatform() if IS_MPS else
90
+ CudaPlatform() if torch.cuda.is_available() else
91
+ None
92
+ )
build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (509 Bytes). View file
 
build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc ADDED
Binary file (4.7 kB). View file
 
build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc ADDED
Binary file (547 Bytes). View file
 
build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _paged_attention_daf6221
3
- ops = torch.ops._paged_attention_daf6221
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_paged_attention_daf6221::{op_name}"
 
1
  import torch
2
+ from . import _paged_attention_6677800
3
+ ops = torch.ops._paged_attention_6677800
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_paged_attention_6677800::{op_name}"
build/{torch25-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so β†’ torch26-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7ddfc4079b69c3e62274797778d5083c0ebbcc9c8d91a13da296d07ea2b4f2e
3
- size 88057032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5077b6b8fffb349c79738c345b02903f643aa9530d10269ea143e8f3125d10e9
3
+ size 88425448
build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_daf6221.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6b6d748077a864b5f3a9b5973cf453d00a685b70654b5a40630891a86d19230
3
- size 87803904
 
 
 
 
build/torch26-cxx11-cu126-x86_64-linux/paged_attention/platforms.py CHANGED
@@ -8,6 +8,7 @@ import numpy as np
8
  import torch
9
 
10
  IS_ROCM = torch.version.hip is not None
 
11
 
12
 
13
  class Platform(ABC):
@@ -32,6 +33,9 @@ class Platform(ABC):
32
  @abstractmethod
33
  def is_rocm(self) -> bool: ...
34
 
 
 
 
35
 
36
  class CudaPlatform(Platform):
37
  @classmethod
@@ -45,6 +49,9 @@ class CudaPlatform(Platform):
45
  def is_rocm(self) -> bool:
46
  return False
47
 
 
 
 
48
 
49
  class RocmPlatform(Platform):
50
  @classmethod
@@ -58,5 +65,28 @@ class RocmPlatform(Platform):
58
  def is_rocm(self) -> bool:
59
  return True
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
 
 
 
 
 
 
8
  import torch
9
 
10
  IS_ROCM = torch.version.hip is not None
11
+ IS_MPS = torch.backends.mps.is_available()
12
 
13
 
14
  class Platform(ABC):
 
33
  @abstractmethod
34
  def is_rocm(self) -> bool: ...
35
 
36
+ @abstractmethod
37
+ def is_mps(self) -> bool: ...
38
+
39
 
40
  class CudaPlatform(Platform):
41
  @classmethod
 
49
  def is_rocm(self) -> bool:
50
  return False
51
 
52
+ def is_mps(self) -> bool:
53
+ return False
54
+
55
 
56
  class RocmPlatform(Platform):
57
  @classmethod
 
65
  def is_rocm(self) -> bool:
66
  return True
67
 
68
+ def is_mps(self) -> bool:
69
+ return False
70
+
71
+
72
+ class MpsPlatform(Platform):
73
+ @classmethod
74
+ @lru_cache(maxsize=8)
75
+ def get_device_name(cls, device_id: int = 0) -> str:
76
+ return torch.cuda.get_device_name(device_id)
77
+
78
+ def is_cuda(self) -> bool:
79
+ return False
80
+
81
+ def is_rocm(self) -> bool:
82
+ return False
83
+
84
+ def is_mps(self) -> bool:
85
+ return True
86
 
87
+ current_platform = (
88
+ RocmPlatform() if IS_ROCM else
89
+ MpsPlatform() if IS_MPS else
90
+ CudaPlatform() if torch.cuda.is_available() else
91
+ None
92
+ )
build/{torch25-cxx11-cu118-x86_64-linux β†’ torch26-cxx11-rocm62-x86_64-linux}/paged_attention/__init__.py RENAMED
File without changes
build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (510 Bytes). View file
 
build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc ADDED
Binary file (4.71 kB). View file
 
build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc ADDED
Binary file (548 Bytes). View file
 
build/{torch25-cxx11-cu118-x86_64-linux β†’ torch26-cxx11-rocm62-x86_64-linux}/paged_attention/_custom_ops.py RENAMED
File without changes
build/{torch25-cxx11-cu118-x86_64-linux β†’ torch26-cxx11-rocm62-x86_64-linux}/paged_attention/_ops.py RENAMED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _paged_attention_daf6221
3
- ops = torch.ops._paged_attention_daf6221
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_paged_attention_daf6221::{op_name}"
 
1
  import torch
2
+ from . import _paged_attention_6677800
3
+ ops = torch.ops._paged_attention_6677800
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_paged_attention_6677800::{op_name}"
build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b4781f634ed1cceabfe9b00e485748ac4e179714ec5e8a3a67475b5a1072fb5
3
+ size 133021344
build/{torch25-cxx11-cu121-x86_64-linux β†’ torch26-cxx11-rocm62-x86_64-linux}/paged_attention/platforms.py RENAMED
@@ -8,6 +8,7 @@ import numpy as np
8
  import torch
9
 
10
  IS_ROCM = torch.version.hip is not None
 
11
 
12
 
13
  class Platform(ABC):
@@ -32,6 +33,9 @@ class Platform(ABC):
32
  @abstractmethod
33
  def is_rocm(self) -> bool: ...
34
 
 
 
 
35
 
36
  class CudaPlatform(Platform):
37
  @classmethod
@@ -45,6 +49,9 @@ class CudaPlatform(Platform):
45
  def is_rocm(self) -> bool:
46
  return False
47
 
 
 
 
48
 
49
  class RocmPlatform(Platform):
50
  @classmethod
@@ -58,5 +65,28 @@ class RocmPlatform(Platform):
58
  def is_rocm(self) -> bool:
59
  return True
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- current_platform = RocmPlatform() if IS_ROCM else CudaPlatform()
 
 
 
 
 
 
8
  import torch
9
 
10
  IS_ROCM = torch.version.hip is not None
11
+ IS_MPS = torch.backends.mps.is_available()
12
 
13
 
14
  class Platform(ABC):
 
33
  @abstractmethod
34
  def is_rocm(self) -> bool: ...
35
 
36
+ @abstractmethod
37
+ def is_mps(self) -> bool: ...
38
+
39
 
40
  class CudaPlatform(Platform):
41
  @classmethod
 
49
  def is_rocm(self) -> bool:
50
  return False
51
 
52
+ def is_mps(self) -> bool:
53
+ return False
54
+
55
 
56
  class RocmPlatform(Platform):
57
  @classmethod
 
65
  def is_rocm(self) -> bool:
66
  return True
67
 
68
+ def is_mps(self) -> bool:
69
+ return False
70
+
71
+
72
+ class MpsPlatform(Platform):
73
+ @classmethod
74
+ @lru_cache(maxsize=8)
75
+ def get_device_name(cls, device_id: int = 0) -> str:
76
+ return torch.cuda.get_device_name(device_id)
77
+
78
+ def is_cuda(self) -> bool:
79
+ return False
80
+
81
+ def is_rocm(self) -> bool:
82
+ return False
83
+
84
+ def is_mps(self) -> bool:
85
+ return True
86
 
87
+ current_platform = (
88
+ RocmPlatform() if IS_ROCM else
89
+ MpsPlatform() if IS_MPS else
90
+ CudaPlatform() if torch.cuda.is_available() else
91
+ None
92
+ )
build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (509 Bytes). View file
 
build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc ADDED
Binary file (4.7 kB). View file
 
build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc ADDED
Binary file (547 Bytes). View file