tianxie-sf
commited on
Commit
•
a473726
1
Parent(s):
d61d475
Update tokenization_xgen.py
Browse files- tokenization_xgen.py +2 -2
tokenization_xgen.py
CHANGED
@@ -134,6 +134,8 @@ class XgenTokenizer(PreTrainedTokenizer):
|
|
134 |
):
|
135 |
pad_token_added = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
|
136 |
eos_token_added = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
|
|
|
|
|
137 |
super().__init__(
|
138 |
pad_token=pad_token_added,
|
139 |
eos_token=eos_token_added,
|
@@ -141,8 +143,6 @@ class XgenTokenizer(PreTrainedTokenizer):
|
|
141 |
add_special_tokens=add_special_tokens,
|
142 |
**kwargs,
|
143 |
)
|
144 |
-
self.add_eos_token = add_eos_token
|
145 |
-
self.encoder = tiktoken_tokenizer(base="gpt2", pad_token=pad_token, add_special=add_special_tokens)
|
146 |
|
147 |
@property
|
148 |
def vocab_size(self):
|
|
|
134 |
):
|
135 |
pad_token_added = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
|
136 |
eos_token_added = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
|
137 |
+
self.add_eos_token = add_eos_token
|
138 |
+
self.encoder = tiktoken_tokenizer(base="gpt2", pad_token=pad_token, add_special=add_special_tokens)
|
139 |
super().__init__(
|
140 |
pad_token=pad_token_added,
|
141 |
eos_token=eos_token_added,
|
|
|
143 |
add_special_tokens=add_special_tokens,
|
144 |
**kwargs,
|
145 |
)
|
|
|
|
|
146 |
|
147 |
@property
|
148 |
def vocab_size(self):
|