hatespeech / hscommon.py
hm-auch
update init app, intro first common file for refactoring
86814fc
raw
history blame
478 Bytes
def encode(sentences, tokenizer, sequence_length):
return tokenizer.batch_encode_plus(
sentences,
max_length=sequence_length, # set the length of the sequences
add_special_tokens=True, # add [CLS] and [SEP] tokens
return_attention_mask=True,
return_token_type_ids=False, # not needed for this type of ML task
pad_to_max_length=True, # add 0 pad tokens to the sequences less than max_length
return_tensors='tf'
)