tianxie-sf
commited on
Commit
•
1a0f468
1
Parent(s):
3987e09
fix issue in get vocab (#18)
Browse files- fix issue in get vocab (c2f5b283e79e4d6e9125d8fd1c1a170b17415b7a)
- tokenization_xgen.py +1 -1
tokenization_xgen.py
CHANGED
@@ -139,7 +139,7 @@ class XgenTokenizer(PreTrainedTokenizer):
|
|
139 |
|
140 |
def get_vocab(self):
|
141 |
"""Returns vocab as a dict"""
|
142 |
-
vocab = {self.
|
143 |
return vocab
|
144 |
|
145 |
def _tokenize(self, text, **kwargs):
|
|
|
139 |
|
140 |
def get_vocab(self):
|
141 |
"""Returns vocab as a dict"""
|
142 |
+
vocab = {self.encoder.decode_single_token_bytes(i): i for i in range(self.vocab_size)}
|
143 |
return vocab
|
144 |
|
145 |
def _tokenize(self, text, **kwargs):
|