File size: 478 Bytes
86814fc
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13



def encode(sentences, tokenizer, sequence_length):
    return tokenizer.batch_encode_plus(
        sentences,
        max_length=sequence_length, # set the length of the sequences
        add_special_tokens=True, # add [CLS] and [SEP] tokens
        return_attention_mask=True,
        return_token_type_ids=False, # not needed for this type of ML task
        pad_to_max_length=True, # add 0 pad tokens to the sequences less than max_length
        return_tensors='tf'
    )