Update tokenization_chatglm.py

#3
Files changed (1) hide show
  1. tokenization_chatglm.py +5 -3
tokenization_chatglm.py CHANGED
@@ -62,14 +62,16 @@ class ChatGLM4Tokenizer(PreTrainedTokenizer):
62
  vocab = {self._convert_id_to_token(i): i for i in range(self.vocab_size)}
63
  vocab.update(self.added_tokens_encoder)
64
  return vocab
65
-
66
- def convert_tokens_to_string(self, tokens: List[Union[bytes, str]]) -> str:
67
  """
68
  Converts a sequence of tokens in a single string.
69
  """
70
  text = ""
71
  temp = b""
72
  for t in tokens:
 
 
73
  if isinstance(t, str):
74
  if temp:
75
  text += temp.decode("utf-8", errors="replace")
@@ -78,7 +80,7 @@ class ChatGLM4Tokenizer(PreTrainedTokenizer):
78
  elif isinstance(t, bytes):
79
  temp += t
80
  else:
81
- raise TypeError("token should only be of type types or str")
82
  if temp:
83
  text += temp.decode("utf-8", errors="replace")
84
  return text
 
62
  vocab = {self._convert_id_to_token(i): i for i in range(self.vocab_size)}
63
  vocab.update(self.added_tokens_encoder)
64
  return vocab
65
+
66
+ def convert_tokens_to_string(self, tokens: List[Union[bytes, str, int]]) -> str:
67
  """
68
  Converts a sequence of tokens in a single string.
69
  """
70
  text = ""
71
  temp = b""
72
  for t in tokens:
73
+ if isinstance(t, int):
74
+ t = chr(t)
75
  if isinstance(t, str):
76
  if temp:
77
  text += temp.decode("utf-8", errors="replace")
 
80
  elif isinstance(t, bytes):
81
  temp += t
82
  else:
83
+ raise TypeError("token should only be of type int, bytes or str")
84
  if temp:
85
  text += temp.decode("utf-8", errors="replace")
86
  return text