tianxie-sf
commited on
Commit
•
c2f5b28
1
Parent(s):
3987e09
fix issue in get vocab
Browse files- tokenization_xgen.py +1 -1
tokenization_xgen.py
CHANGED
@@ -139,7 +139,7 @@ class XgenTokenizer(PreTrainedTokenizer):
|
|
139 |
|
140 |
def get_vocab(self):
|
141 |
"""Returns vocab as a dict"""
|
142 |
-
vocab = {self.
|
143 |
return vocab
|
144 |
|
145 |
def _tokenize(self, text, **kwargs):
|
|
|
139 |
|
140 |
def get_vocab(self):
|
141 |
"""Returns vocab as a dict"""
|
142 |
+
vocab = {self.encoder.decode_single_token_bytes(i): i for i in range(self.vocab_size)}
|
143 |
return vocab
|
144 |
|
145 |
def _tokenize(self, text, **kwargs):
|