tianxie-sf commited on
Commit
c2f5b28
1 Parent(s): 3987e09

fix issue in get vocab

Browse files
Files changed (1) hide show
  1. tokenization_xgen.py +1 -1
tokenization_xgen.py CHANGED
@@ -139,7 +139,7 @@ class XgenTokenizer(PreTrainedTokenizer):
139
 
140
  def get_vocab(self):
141
  """Returns vocab as a dict"""
142
- vocab = {self._convert_id_to_token(i): i for i in range(self.vocab_size)}
143
  return vocab
144
 
145
  def _tokenize(self, text, **kwargs):
 
139
 
140
  def get_vocab(self):
141
  """Returns vocab as a dict"""
142
+ vocab = {self.encoder.decode_single_token_bytes(i): i for i in range(self.vocab_size)}
143
  return vocab
144
 
145
  def _tokenize(self, text, **kwargs):