KoichiYasuoka
commited on
Commit
·
227f5cf
1
Parent(s):
8c9cd3c
bug fix
Browse files
mecab.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
import os
|
2 |
from transformers import BertTokenizerFast
|
3 |
from transformers.models.bert_japanese.tokenization_bert_japanese import MecabTokenizer
|
4 |
|
@@ -24,9 +23,10 @@ class BertMecabTokenizerFast(BertTokenizerFast):
|
|
24 |
self.custom_pre_tokenizer=pre_tokenizers.Sequence([pre_tokenizers.PreTokenizer.custom(MecabPreTokenizer(**d)),pre_tokenizers.BertPreTokenizer()])
|
25 |
self._tokenizer.pre_tokenizer=self.custom_pre_tokenizer
|
26 |
def save_pretrained(self,save_directory,**kwargs):
|
|
|
27 |
import shutil
|
28 |
from tokenizers.pre_tokenizers import Metaspace
|
29 |
-
self._auto_map={"AutoTokenizer":[
|
30 |
self._tokenizer.pre_tokenizer=Metaspace()
|
31 |
super().save_pretrained(save_directory,**kwargs)
|
32 |
self._tokenizer.pre_tokenizer=self.custom_pre_tokenizer
|
|
|
|
|
1 |
from transformers import BertTokenizerFast
|
2 |
from transformers.models.bert_japanese.tokenization_bert_japanese import MecabTokenizer
|
3 |
|
|
|
23 |
self.custom_pre_tokenizer=pre_tokenizers.Sequence([pre_tokenizers.PreTokenizer.custom(MecabPreTokenizer(**d)),pre_tokenizers.BertPreTokenizer()])
|
24 |
self._tokenizer.pre_tokenizer=self.custom_pre_tokenizer
|
25 |
def save_pretrained(self,save_directory,**kwargs):
|
26 |
+
import os
|
27 |
import shutil
|
28 |
from tokenizers.pre_tokenizers import Metaspace
|
29 |
+
self._auto_map={"AutoTokenizer":[None,"mecab.BertMecabTokenizerFast"]}
|
30 |
self._tokenizer.pre_tokenizer=Metaspace()
|
31 |
super().save_pretrained(save_directory,**kwargs)
|
32 |
self._tokenizer.pre_tokenizer=self.custom_pre_tokenizer
|