Update tokenization_chatglm.py
Browse files当使用vllm+outlines时,由于会sorted词表,导致bytes和str无法比较
- tokenization_chatglm.py +1 -1
tokenization_chatglm.py
CHANGED
@@ -60,7 +60,7 @@ class ChatGLM4Tokenizer(PreTrainedTokenizer):
|
|
60 |
def get_vocab(self):
|
61 |
""" Returns vocab as a dict """
|
62 |
vocab = {self._convert_id_to_token(i): i for i in range(self.vocab_size)}
|
63 |
-
vocab.update(self.added_tokens_encoder)
|
64 |
return vocab
|
65 |
|
66 |
def convert_tokens_to_string(self, tokens: List[Union[bytes, str]]) -> str:
|
|
|
60 |
def get_vocab(self):
|
61 |
""" Returns vocab as a dict """
|
62 |
vocab = {self._convert_id_to_token(i): i for i in range(self.vocab_size)}
|
63 |
+
vocab.update(dict([(bytes(item[0],'utf-8'),item[1]) for item in self.added_tokens_encoder.items()]))
|
64 |
return vocab
|
65 |
|
66 |
def convert_tokens_to_string(self, tokens: List[Union[bytes, str]]) -> str:
|