|
import os |
|
import torch |
|
import clip |
|
from utils import MLP, normalized |
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
|
|
class EndpointHandler: |
|
def __init__(self, path=""): |
|
model = MLP(768) |
|
|
|
s = torch.load(os.path.join(path, "sac+logos+ava1-l14-linearMSE.pth"), map_location=device) |
|
|
|
model.load_state_dict(s) |
|
model.to(device) |
|
model.eval() |
|
|
|
model2, preprocess = clip.load("ViT-L/14", device=device) |
|
|
|
self.model_dict = {} |
|
self.model_dict["classifier"] = model |
|
self.model_dict["clip_model"] = model2 |
|
self.model_dict["clip_preprocess"] = preprocess |
|
self.model_dict["device"] = device |
|
|
|
def __call__(self, data): |
|
""" |
|
data args: |
|
inputs (:obj:`PIL.Image`) |
|
Return: |
|
A :obj:`list`:. The list contains items that are dicts should be liked {"label": "XXX", "score": 0.82} |
|
""" |
|
|
|
image = data.pop("inputs", data) |
|
|
|
image_input = self.model_dict["clip_preprocess"](image).unsqueeze(0).to(self.model_dict["device"]) |
|
with torch.no_grad(): |
|
image_features = self.model_dict["clip_model"].encode_image(image_input) |
|
if self.model_dict["device"].type == "cuda": |
|
im_emb_arr = normalized(image_features.detach().cpu().numpy()) |
|
im_emb = torch.from_numpy(im_emb_arr).to(self.model_dict["device"]).type(torch.cuda.FloatTensor) |
|
else: |
|
im_emb_arr = normalized(image_features.detach().numpy()) |
|
im_emb = torch.from_numpy(im_emb_arr).to(self.model_dict["device"]).type(torch.FloatTensor) |
|
|
|
prediction = self.model_dict["classifier"](im_emb) |
|
score = prediction.item() |
|
|
|
return {"aesthetic score": score} |
|
|