|
from transformers import CLIPModel, AutoProcessor |
|
from PIL import Image |
|
|
|
model = CLIPModel.from_pretrained( |
|
"openai/clip-vit-large-patch14" |
|
) |
|
processor = AutoProcessor.from_pretrained( |
|
"openai/clip-vit-large-patch14" |
|
) |
|
|
|
labels = ["a photo of a cat", "a photo of a dog"] |
|
|
|
def classify_image(image_path): |
|
""" |
|
Perform zero-shot classification on a single image. |
|
|
|
Args: |
|
image_path (str): Path to the image. |
|
|
|
Returns: |
|
dict: Classification probabilities for the image. |
|
""" |
|
|
|
image = Image.open(image_path) |
|
|
|
|
|
inputs = processor( |
|
text=labels, |
|
images=image, |
|
return_tensors="pt", |
|
padding=True |
|
) |
|
|
|
|
|
outputs = model(**inputs) |
|
probs = outputs.logits_per_image.softmax(dim=1)[0] |
|
|
|
|
|
return {labels[i]: probs[i].item() for i in range(len(labels))} |
|
|