import datasets from transformers import AutoTokenizer dataset = datasets.load_dataset( # <1> "rotten_tomatoes", # <1> split="train", # <1> ) # <1> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased") dataset = datasets.map( # <2> lambda examples: tokenizer(examples["text"]), # <2> batched=True, # <2> ) # <2> ... # <3>