Update tokenizer_script.py
Browse files- tokenizer_script.py +1 -1
tokenizer_script.py
CHANGED
@@ -90,7 +90,7 @@ class CharacterTokenizer(PreTrainedTokenizer):
|
|
90 |
# properly handle padding side
|
91 |
pad_id = self.vocab.get(self.pad_token, 0)
|
92 |
max_len = max(len(ids) for ids in encoded_texts) if max_length is None else max_length
|
93 |
-
if
|
94 |
encoded_texts = [ids + [pad_id] * (max_len - len(ids)) for ids in encoded_texts]
|
95 |
else:
|
96 |
encoded_texts = [[pad_id] * (max_len - len(ids)) + ids for ids in encoded_texts]
|
|
|
90 |
# properly handle padding side
|
91 |
pad_id = self.vocab.get(self.pad_token, 0)
|
92 |
max_len = max(len(ids) for ids in encoded_texts) if max_length is None else max_length
|
93 |
+
if self.padding_side == "right":
|
94 |
encoded_texts = [ids + [pad_id] * (max_len - len(ids)) for ids in encoded_texts]
|
95 |
else:
|
96 |
encoded_texts = [[pad_id] * (max_len - len(ids)) + ids for ids in encoded_texts]
|