philschmid's picture
philschmid HF staff
Update handler.py
286858d
import os
import torch
import clip
from utils import MLP, normalized
# set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class EndpointHandler:
def __init__(self, path=""):
model = MLP(768)
s = torch.load(os.path.join(path, "sac+logos+ava1-l14-linearMSE.pth"), map_location=device)
model.load_state_dict(s)
model.to(device)
model.eval()
model2, preprocess = clip.load("ViT-L/14", device=device)
self.model_dict = {}
self.model_dict["classifier"] = model
self.model_dict["clip_model"] = model2
self.model_dict["clip_preprocess"] = preprocess
self.model_dict["device"] = device
def __call__(self, data):
"""
data args:
inputs (:obj:`PIL.Image`)
Return:
A :obj:`list`:. The list contains items that are dicts should be liked {"label": "XXX", "score": 0.82}
"""
# extract converted PIL image from serialized request
image = data.pop("inputs", data)
image_input = self.model_dict["clip_preprocess"](image).unsqueeze(0).to(self.model_dict["device"])
with torch.no_grad():
image_features = self.model_dict["clip_model"].encode_image(image_input)
if self.model_dict["device"].type == "cuda":
im_emb_arr = normalized(image_features.detach().cpu().numpy())
im_emb = torch.from_numpy(im_emb_arr).to(self.model_dict["device"]).type(torch.cuda.FloatTensor)
else:
im_emb_arr = normalized(image_features.detach().numpy())
im_emb = torch.from_numpy(im_emb_arr).to(self.model_dict["device"]).type(torch.FloatTensor)
prediction = self.model_dict["classifier"](im_emb)
score = prediction.item()
return {"aesthetic score": score}