File size: 353 Bytes
ed390c4
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
from datasets import load_dataset
from transformers import tokenizer

# Load a dataset from a csv file
dataset = load_dataset('csv', data_files='data.csv')

# Tokenize the text
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_dataset = dataset.map(tokenize_function, batched=True)