Spaces:
Running
Running
File size: 2,077 Bytes
224a33f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
from esm.tokenization.tokenizer_base import EsmTokenizerBase
class StructureTokenizer(EsmTokenizerBase):
"""A convenince class for accessing special token ids of
the StructureTokenEncoder and StructureTokenDecoder."""
def __init__(self, vq_vae_special_tokens: dict[str, int]):
self.vq_vae_special_tokens = vq_vae_special_tokens
def mask_token(self) -> str:
raise NotImplementedError(
"Structure tokens are defined on 3D coordinates, not strings."
)
@property
def mask_token_id(self) -> int:
return self.vq_vae_special_tokens["MASK"]
def bos_token(self) -> str:
raise NotImplementedError(
"Structure tokens are defined on 3D coordinates, not strings."
)
@property
def bos_token_id(self) -> int:
return self.vq_vae_special_tokens["BOS"]
def eos_token(self) -> str:
raise NotImplementedError(
"Structure tokens are defined on 3D coordinates, not strings."
)
@property
def eos_token_id(self) -> int:
return self.vq_vae_special_tokens["EOS"]
def pad_token(self) -> str:
raise NotImplementedError(
"Structure tokens are defined on 3D coordinates, not strings."
)
@property
def pad_token_id(self) -> int:
return self.vq_vae_special_tokens["PAD"]
@property
def chainbreak_token_id(self) -> int:
return self.vq_vae_special_tokens["CHAINBREAK"]
def encode(self, *args, **kwargs):
raise NotImplementedError(
"The StructureTokenizer class is provided as a convenience for "
"accessing special token ids of the StructureTokenEncoder and StructureTokenDecoder.\n"
"Please use them instead."
)
def decode(self, *args, **kwargs):
raise NotImplementedError(
"The StructureTokenizer class is provided as a convenience for "
"accessing special token ids of the StructureTokenEncoder and StructureTokenDecoder.\n"
"Please use them instead."
)
|