Upload tokenizer
Browse files- tokenizer_script.py +12 -0
tokenizer_script.py
CHANGED
@@ -109,6 +109,18 @@ class CharacterTokenizer(PreTrainedTokenizer):
|
|
109 |
|
110 |
def convert_tokens_to_string(self, tokens):
|
111 |
return "".join(tokens)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
@classmethod
|
114 |
def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
|
|
|
109 |
|
110 |
def convert_tokens_to_string(self, tokens):
|
111 |
return "".join(tokens)
|
112 |
+
|
113 |
+
|
114 |
+
@classmethod
|
115 |
+
def from_json(cls, vocab_file, **kwargs):
|
116 |
+
with open(vocab_file, 'r', encoding='utf-8') as f:
|
117 |
+
vocab = json.load(f)
|
118 |
+
|
119 |
+
return cls(vocab=vocab, **kwargs)
|
120 |
+
|
121 |
+
@classmethod
|
122 |
+
def from_vocab(cls, vocab, **kwargs):
|
123 |
+
return cls(vocab=vocab, **kwargs)
|
124 |
|
125 |
@classmethod
|
126 |
def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
|