File size: 351 Bytes
4bb9d41
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
import pytest
from app.model.tokenizer import NigerianLanguageTokenizer
from transformers import AutoTokenizer

def test_tokenizer():
    base_tokenizer = AutoTokenizer.from_pretrained("gpt2")
    tokenizer = NigerianLanguageTokenizer(base_tokenizer)
    text = "Sample text"
    tokens = tokenizer.tokenize_batch([text])
    assert tokens is not None