This is the official Hugging Face repo for PathCLIP

Usage

import torch
from PIL import Image

import open_clip

##load the model
model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16', pretrained='your_path/pathclip-base.pt',
                                                       cache_dir='/mnt/Xsky/syx/model/open_clip', force_quick_gelu=True)
tokenizer = open_clip.get_tokenizer('ViT-B-16')
model = model.cuda()

##load the image and prepare the text prompt
img_path = 'your_img_path'
label_description_list = ['label description1', 'label description3', 'label description3'] # specify the label descriptions
text_label_list = ['An image of {}'.format(i) for i in label_description_list]
image = Image.open(img_path)
image = preprocess(image).unsqueeze(0).cuda()
text = tokenizer(text_label_list).cuda()

##extract the img and text feature and predict the label
with torch.no_grad(), torch.cuda.amp.autocast():
    image_features = model.encode_image(image)
    text_features = model.encode_text(text)
    image_features /= image_features.norm(dim=-1, keepdim=True)
    text_features /= text_features.norm(dim=-1, keepdim=True)
    text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)
    predict_label = torch.argmax(text_probs).item()
Downloads last month

-

Downloads are not tracked for this model. How to track
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.
The model cannot be deployed to the HF Inference API: The model has no library tag.