from PIL import Image import numpy as np import vision_transformer import torch import torch.nn as nn import torchvision.transforms as transforms from huggingface_hub import PyTorchModelHubMixin class SP22M(nn.Module, PyTorchModelHubMixin): def __init__(self): super().__init__() self.encoder = vision_transformer.vit_small(num_classes=0) def forward(self, x): return self.encoder(x) # Download up model model = SP22M.from_pretrained("MountSinaiCompPath/SP22M") # Set up transform transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) # Image img = np.random.randint(0, 256, size=224*224*3).reshape(224,224,3).astype(np.uint8) img = Image.fromarray(img) img = transform(img).unsqueeze(0) # Inference with torch.no_grad(): h = model(img)