Spaces:
Sleeping
Sleeping
"""Token blocks.""" | |
from typing import List | |
from vllm.utils import Device | |
_BLANK_TOKEN_ID = -1 | |
class LogicalTokenBlock: | |
"""A block that stores a contiguous chunk of tokens from left to right. | |
Logical blocks are used to represent the states of the corresponding | |
physical blocks in the KV cache. | |
""" | |
def __init__( | |
self, | |
block_number: int, | |
block_size: int, | |
) -> None: | |
self.block_number = block_number | |
self.block_size = block_size | |
self.token_ids = [_BLANK_TOKEN_ID] * block_size | |
self.num_tokens = 0 | |
def is_empty(self) -> bool: | |
return self.num_tokens == 0 | |
def get_num_empty_slots(self) -> int: | |
return self.block_size - self.num_tokens | |
def is_full(self) -> bool: | |
return self.num_tokens == self.block_size | |
def append_tokens(self, token_ids: List[int]) -> None: | |
assert len(token_ids) <= self.get_num_empty_slots() | |
curr_idx = self.num_tokens | |
self.token_ids[curr_idx:curr_idx + len(token_ids)] = token_ids | |
self.num_tokens += len(token_ids) | |
def get_token_ids(self) -> List[int]: | |
return self.token_ids[:self.num_tokens] | |
def get_last_token_id(self) -> int: | |
assert self.num_tokens > 0 | |
return self.token_ids[self.num_tokens - 1] | |
class PhysicalTokenBlock: | |
"""Represents the state of a block in the KV cache.""" | |
def __init__( | |
self, | |
device: Device, | |
block_number: int, | |
block_size: int, | |
) -> None: | |
self.device = device | |
self.block_number = block_number | |
self.block_size = block_size | |
self.ref_count = 0 | |
def __repr__(self) -> str: | |
return (f'PhysicalTokenBlock(device={self.device}, ' | |
f'block_number={self.block_number}, ' | |
f'ref_count={self.ref_count})') | |
# Mapping: logical block number -> physical block. | |
BlockTable = List[PhysicalTokenBlock] | |