niuba commited on
Commit
c04c3d0
1 Parent(s): 5bac540

Update tokenization_chatglm.py

Browse files

当使用vllm+outlines时,由于会sorted词表,导致bytes和str无法比较

Files changed (1) hide show
  1. tokenization_chatglm.py +1 -1
tokenization_chatglm.py CHANGED
@@ -60,7 +60,7 @@ class ChatGLM4Tokenizer(PreTrainedTokenizer):
60
  def get_vocab(self):
61
  """ Returns vocab as a dict """
62
  vocab = {self._convert_id_to_token(i): i for i in range(self.vocab_size)}
63
- vocab.update(self.added_tokens_encoder)
64
  return vocab
65
 
66
  def convert_tokens_to_string(self, tokens: List[Union[bytes, str]]) -> str:
 
60
  def get_vocab(self):
61
  """ Returns vocab as a dict """
62
  vocab = {self._convert_id_to_token(i): i for i in range(self.vocab_size)}
63
+ vocab.update(dict([(bytes(item[0],'utf-8'),item[1]) for item in self.added_tokens_encoder.items()]))
64
  return vocab
65
 
66
  def convert_tokens_to_string(self, tokens: List[Union[bytes, str]]) -> str: