test-deploy-dify / custom_tokenizer.py
phamduyphuong251's picture
Upload 16 files
0764b1c verified
raw
history blame contribute delete
325 Bytes
from transformers import PhobertTokenizer
from pyvi import ViTokenizer
class CustomPhobertTokenizer(PhobertTokenizer):
def rdr_segment(self, text):
return ViTokenizer.tokenize(text)
def _tokenize(self, text):
segmented_text = self.rdr_segment(text)
return super()._tokenize(segmented_text)