File size: 325 Bytes
c1d85a2 |
1 2 3 4 5 6 7 8 9 10 11 12 |
from transformers import PhobertTokenizer
from pyvi import ViTokenizer
class CustomPhobertTokenizer(PhobertTokenizer):
def rdr_segment(self, text):
return ViTokenizer.tokenize(text)
def _tokenize(self, text):
segmented_text = self.rdr_segment(text)
return super()._tokenize(segmented_text)
|