File size: 396 Bytes
a88d3cb
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
from datasets import load_dataset
from transformers import CLIPProcessor

dataset = load_dataset('coco', split='train')


processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

def preprocess_data(examples):
    inputs = processor(text=examples["caption"], images=examples["image"], return_tensors="pt", padding=True)
    return inputs

dataset = dataset.map(preprocess_data)