Spaces:
Sleeping
Sleeping
File size: 429 Bytes
4bb9d41 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 |
from transformers import PreTrainedTokenizerFast
from typing import List, Dict
class NigerianLanguageTokenizer:
def __init__(self, base_tokenizer: PreTrainedTokenizerFast):
self.tokenizer = base_tokenizer
def tokenize_batch(self, texts: List[str]) -> Dict:
return self.tokenizer(
texts,
padding=True,
truncation=True,
return_tensors="pt"
) |