File size: 357 Bytes
2fc4496 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
import datasets
from transformers import AutoTokenizer
dataset = datasets.load_dataset( # <1>
"rotten_tomatoes", # <1>
split="train", # <1>
) # <1>
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
dataset = datasets.map( # <2>
lambda examples: tokenizer(examples["text"]), # <2>
batched=True, # <2>
) # <2>
... # <3>
|