Spaces:
Runtime error
Runtime error
# Importing all necessary libraries ------------------------------------------ | |
from PIL import Image | |
import gradio as gr | |
import numpy as np | |
import pandas as pd | |
import torch | |
import torch.nn as nn | |
from torchvision import models, transforms | |
import sys, os, distutils.core | |
import detectron2 | |
from detectron2 import model_zoo | |
from detectron2.utils.logger import setup_logger | |
from detectron2.engine import DefaultPredictor | |
from detectron2.config import get_cfg | |
# Model setup --------------------------------------------------------------- | |
sys.path.insert(0, os.path.abspath("./detectron2")) | |
setup_logger() | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
n_classes = 15 | |
criterion = nn.CrossEntropyLoss() | |
# Main model | |
model = models.resnet18(pretrained = True) | |
for param in model.parameters(): | |
param.require_grad = False | |
n_features = model.fc.in_features | |
model.fc = nn.Linear(n_features, n_classes) | |
model = model.to(device) | |
# Viewpoint model | |
model_viewpoint = models.resnet18(pretrained = True) | |
for param in model_viewpoint.parameters(): | |
param.require_grad = False | |
n_features = model_viewpoint.fc.in_features | |
model_viewpoint.fc = nn.Linear(n_features, 4) | |
model_viewpoint = model_viewpoint.to(device) | |
# Typicality model | |
model_typicality = models.resnet18(pretrained = True) | |
for param in model_typicality.parameters(): | |
param.require_grad = False | |
n_features = model_typicality.fc.in_features | |
model_typicality.fc = nn.Linear(n_features, 5) | |
model_typicality = model_typicality.to(device) | |
model_Softmax = nn.Softmax(dim = 1) | |
cos = nn.CosineSimilarity() | |
# Transformations to the test set | |
test_transforms = transforms.Compose( | |
[transforms.Resize(size = (224, 224)), | |
transforms.ToTensor(), | |
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) | |
] | |
) | |
# Helper functions ---------------------------------------------------------- | |
def accuracy(y_pred, y): | |
top_pred = y_pred.argmax(1, keepdim = True) | |
correct = top_pred.eq(y.view_as(top_pred)).sum() | |
acc = correct.float() / y.shape[0] | |
return acc | |
activation = {} | |
def getActivation(name): | |
def hook(model_typicality, input, output): | |
activation[name] = output.detach() | |
return hook | |
def save_image_locally(image_array_FN, path_FN = "fake.jpg"): | |
image_array_FN = image_array_FN.astype(np.uint8) | |
data = Image.fromarray(image_array_FN) | |
data.save(path_FN) | |
return None | |
# Prediction ---------------------------------------------------------------- | |
typicality_dict = {"Convertible": 0, "Hatchback": 1, "MPV": 2, "SUV": 3, "Saloon": 4} | |
classes_dict = {"Convertible_2000": 0, "Convertible_2003": 1, "Convertible_2006": 2, "Convertible_2007": 3, "Convertible_2008": 4, "Convertible_2009": 5, "Convertible_2010": 6, "Convertible_2011": 7, "Convertible_2012": 8, "Convertible_2013": 9, "Convertible_2014": 10, "Convertible_2015": 11, "Convertible_2016": 12, "Convertible_2017": 13, "Hatchback_2000": 14, "Hatchback_2003": 15, "Hatchback_2006": 16, "Hatchback_2007": 17, "Hatchback_2008": 18, "Hatchback_2009": 19, "Hatchback_2010": 20, "Hatchback_2011": 21, "Hatchback_2012": 22, "Hatchback_2013": 23, "Hatchback_2014": 24, "Hatchback_2015": 25, "Hatchback_2016": 26, "Hatchback_2017": 27, "MPV_2000": 28, "MPV_2003": 29, "MPV_2006": 30, "MPV_2007": 31, "MPV_2008": 32, "MPV_2009": 33, "MPV_2010": 34, "MPV_2011": 35, "MPV_2012": 36, "MPV_2013": 37, "MPV_2014": 38, "MPV_2015": 39, "MPV_2016": 40, "MPV_2017": 41, "MPV_2018": 42, "SUV_2000": 43, "SUV_2003": 44, "SUV_2006": 45, "SUV_2007": 46, "SUV_2008": 47, "SUV_2009": 48, "SUV_2010": 49, "SUV_2011": 50, "SUV_2012": 51, "SUV_2013": 52, "SUV_2014": 53, "SUV_2015": 54, "SUV_2016": 55, "SUV_2017": 56, "SUV_2018": 57, "Saloon_2000": 58, "Saloon_2003": 59, "Saloon_2006": 60, "Saloon_2007": 61, "Saloon_2008": 62, "Saloon_2009": 63, "Saloon_2010": 64, "Saloon_2011": 65, "Saloon_2012": 66, "Saloon_2013": 67, "Saloon_2014": 68, "Saloon_2015": 69, "Saloon_2016": 70, "Saloon_2017": 71, "Saloon_2018": 72} | |
years_dict = {"2000": 0, "2003": 1, "2006": 2, "2007": 3, "2008": 4, "2009": 5, "2010": 6, "2011": 7, "2012": 8, "2013": 9, "2014": 10, "2015": 11, "2016": 12, "2017": 13, "2018": 14} | |
dist = distutils.core.run_setup("./detectron2/setup.py") | |
cfg = get_cfg() | |
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) | |
cfg.model.roi_heads.score_thresh_test = 0.5 | |
cfg.model.weights = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") | |
cfg.model.device = "cpu" | |
predictor = DefaultPredictor(cfg) | |
def predict(img_F): | |
target_class = 2 | |
img = Image.fromarray(img_F.astype("uint8"), "RGB") | |
img = np.array(img) | |
outputs = predictor(img) | |
masks = outputs["instances"].pred_masks | |
pred_classes = outputs["instances"].pred_classes.tolist() | |
pred_boxes = list(outputs["instances"].pred_boxes) | |
areas = torch.sum(torch.flatten(masks, start_dim = 1), dim = 1).tolist() | |
total_area = [] | |
car_area = [] | |
for idx in range(len(pred_classes)): | |
if pred_classes[idx] == target_class: | |
total_area.append(areas[idx]) | |
car_area.append(idx) | |
if len(car_area) == 0: | |
img = Image.open("init.jpg") | |
img = np.array(img) | |
text_output = "Sorry! I am not able to recognize a car in this image. Please upload a new photo!" | |
return text_output, img | |
local_idx = total_area.index(max(total_area)) | |
global_idx = car_area[local_idx] | |
unsq = outputs["instances"].pred_masks[index_global].unsqueeze(-1).to("cpu") | |
mult = torch.tensor(img) * unsq | |
unsq = unsq.int() | |
unsq[unsq == 0] = 255 | |
unsq[unsq == 1] = 0 | |
mult = mult + unsq | |
res = mult.numpy() | |
save_image_locally(res, path_FN = "fake.jpg") | |
img_pred = Image.open("fake.jpg") | |
img_pred = test_transforms(img_pred) | |
model_viewpoint.load_state_dict(torch.load("model_viewpoint.pt", map_location = torch.device("cpu"))) | |
model_viewpoint.eval() | |
y_pred = model_viewpoint(img_pred.unsqueeze(0)) | |
y_pred = model_Softmax(y_pred) | |
top_pred = y_pred.argmax(1, keepdim = True) | |
if top_pred.item() not in [0, 6] : | |
img = Image.open("fake.jpg") | |
img = np.array(img) | |
text_output = "Sorry! I am not able to recognize a frontal view of a car in this image. Please upload a new photo!" | |
return text_output, img | |
model.load_state_dict(torch.load("model_modernity.pt", map_location = torch.device("cpu"))) | |
model.eval() | |
score_t = model(img_pred.unsqueeze(0)) | |
score_t = model_Softmax(score_t) | |
model_year = score_curr.argmax(1, keepdim = True).item() | |
score_t = torch.mul(torch.range(0, 14).to(device), torch.reshape(score_t, (-1, ))).sum().item() | |
model_typicality.load_state_dict(torch.load("model_typicality.pt", map_location = torch.device("cpu"))) | |
model_typicality.eval() | |
model_part = model_typicality(img_pred.unsqueeze(0)) | |
model_part = model_Softmax(model_part) | |
model_part = model_part.argmax(1, keepdim = True).item() | |
model_avg = pd.DataFrame() | |
h1 = model_typicality.avgpool.register_forward_hook(getActivation("avgpool")) | |
out = model_typicality(img_pred.unsqueeze(0)) | |
act_pool_t = activation["avgpool"] | |
h1.remove() | |
model_year = list(years_dict.keys())[list(years_dict.values()).index(model_year)] | |
model_part = list(typicality_dict.keys())[list(typicality_dict.values()).index(model_part)] | |
true_idx = classes_dict[model_part + "_" + model_year] | |
morph_avg = torch.load("morph.pt") | |
cos_t = cos(morph_avg[true_idx], act_pool_t).item() | |
txt = "Modernity score:", str(round(score_t, 2)), "| Typicality score:", str(round(cos_t, 2)) | |
return txt, res | |
# Launching the app --------------------------------------------------------- | |
interface = gr.Interface( | |
predict, | |
inputs = "image", | |
outputs = ["text", gr.Image(type = "pil")], | |
title = "Let's classify your car!") | |
interface.launch() | |