DS405B_space / app.py
lneduchal's picture
Update app.py
bb5cb3f
# Importing all necessary libraries ------------------------------------------
from PIL import Image
import gradio as gr
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torchvision import models, transforms
import sys, os, distutils.core
import detectron2
from detectron2 import model_zoo
from detectron2.utils.logger import setup_logger
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
# Model setup ---------------------------------------------------------------
sys.path.insert(0, os.path.abspath("./detectron2"))
setup_logger()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_classes = 15
criterion = nn.CrossEntropyLoss()
# Main model
model = models.resnet18(pretrained = True)
for param in model.parameters():
param.require_grad = False
n_features = model.fc.in_features
model.fc = nn.Linear(n_features, n_classes)
model = model.to(device)
# Viewpoint model
model_viewpoint = models.resnet18(pretrained = True)
for param in model_viewpoint.parameters():
param.require_grad = False
n_features = model_viewpoint.fc.in_features
model_viewpoint.fc = nn.Linear(n_features, 4)
model_viewpoint = model_viewpoint.to(device)
# Typicality model
model_typicality = models.resnet18(pretrained = True)
for param in model_typicality.parameters():
param.require_grad = False
n_features = model_typicality.fc.in_features
model_typicality.fc = nn.Linear(n_features, 5)
model_typicality = model_typicality.to(device)
model_Softmax = nn.Softmax(dim = 1)
cos = nn.CosineSimilarity()
# Transformations to the test set
test_transforms = transforms.Compose(
[transforms.Resize(size = (224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]
)
# Helper functions ----------------------------------------------------------
def accuracy(y_pred, y):
top_pred = y_pred.argmax(1, keepdim = True)
correct = top_pred.eq(y.view_as(top_pred)).sum()
acc = correct.float() / y.shape[0]
return acc
activation = {}
def getActivation(name):
def hook(model_typicality, input, output):
activation[name] = output.detach()
return hook
def save_image_locally(image_array_FN, path_FN = "fake.jpg"):
image_array_FN = image_array_FN.astype(np.uint8)
data = Image.fromarray(image_array_FN)
data.save(path_FN)
return None
# Prediction ----------------------------------------------------------------
typicality_dict = {"Convertible": 0, "Hatchback": 1, "MPV": 2, "SUV": 3, "Saloon": 4}
classes_dict = {"Convertible_2000": 0, "Convertible_2003": 1, "Convertible_2006": 2, "Convertible_2007": 3, "Convertible_2008": 4, "Convertible_2009": 5, "Convertible_2010": 6, "Convertible_2011": 7, "Convertible_2012": 8, "Convertible_2013": 9, "Convertible_2014": 10, "Convertible_2015": 11, "Convertible_2016": 12, "Convertible_2017": 13, "Hatchback_2000": 14, "Hatchback_2003": 15, "Hatchback_2006": 16, "Hatchback_2007": 17, "Hatchback_2008": 18, "Hatchback_2009": 19, "Hatchback_2010": 20, "Hatchback_2011": 21, "Hatchback_2012": 22, "Hatchback_2013": 23, "Hatchback_2014": 24, "Hatchback_2015": 25, "Hatchback_2016": 26, "Hatchback_2017": 27, "MPV_2000": 28, "MPV_2003": 29, "MPV_2006": 30, "MPV_2007": 31, "MPV_2008": 32, "MPV_2009": 33, "MPV_2010": 34, "MPV_2011": 35, "MPV_2012": 36, "MPV_2013": 37, "MPV_2014": 38, "MPV_2015": 39, "MPV_2016": 40, "MPV_2017": 41, "MPV_2018": 42, "SUV_2000": 43, "SUV_2003": 44, "SUV_2006": 45, "SUV_2007": 46, "SUV_2008": 47, "SUV_2009": 48, "SUV_2010": 49, "SUV_2011": 50, "SUV_2012": 51, "SUV_2013": 52, "SUV_2014": 53, "SUV_2015": 54, "SUV_2016": 55, "SUV_2017": 56, "SUV_2018": 57, "Saloon_2000": 58, "Saloon_2003": 59, "Saloon_2006": 60, "Saloon_2007": 61, "Saloon_2008": 62, "Saloon_2009": 63, "Saloon_2010": 64, "Saloon_2011": 65, "Saloon_2012": 66, "Saloon_2013": 67, "Saloon_2014": 68, "Saloon_2015": 69, "Saloon_2016": 70, "Saloon_2017": 71, "Saloon_2018": 72}
years_dict = {"2000": 0, "2003": 1, "2006": 2, "2007": 3, "2008": 4, "2009": 5, "2010": 6, "2011": 7, "2012": 8, "2013": 9, "2014": 10, "2015": 11, "2016": 12, "2017": 13, "2018": 14}
dist = distutils.core.run_setup("./detectron2/setup.py")
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.model.roi_heads.score_thresh_test = 0.5
cfg.model.weights = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.model.device = "cpu"
predictor = DefaultPredictor(cfg)
def predict(img_F):
target_class = 2
img = Image.fromarray(img_F.astype("uint8"), "RGB")
img = np.array(img)
outputs = predictor(img)
masks = outputs["instances"].pred_masks
pred_classes = outputs["instances"].pred_classes.tolist()
pred_boxes = list(outputs["instances"].pred_boxes)
areas = torch.sum(torch.flatten(masks, start_dim = 1), dim = 1).tolist()
total_area = []
car_area = []
for idx in range(len(pred_classes)):
if pred_classes[idx] == target_class:
total_area.append(areas[idx])
car_area.append(idx)
if len(car_area) == 0:
img = Image.open("init.jpg")
img = np.array(img)
text_output = "Sorry! I am not able to recognize a car in this image. Please upload a new photo!"
return text_output, img
local_idx = total_area.index(max(total_area))
global_idx = car_area[local_idx]
unsq = outputs["instances"].pred_masks[index_global].unsqueeze(-1).to("cpu")
mult = torch.tensor(img) * unsq
unsq = unsq.int()
unsq[unsq == 0] = 255
unsq[unsq == 1] = 0
mult = mult + unsq
res = mult.numpy()
save_image_locally(res, path_FN = "fake.jpg")
img_pred = Image.open("fake.jpg")
img_pred = test_transforms(img_pred)
model_viewpoint.load_state_dict(torch.load("model_viewpoint.pt", map_location = torch.device("cpu")))
model_viewpoint.eval()
y_pred = model_viewpoint(img_pred.unsqueeze(0))
y_pred = model_Softmax(y_pred)
top_pred = y_pred.argmax(1, keepdim = True)
if top_pred.item() not in [0, 6] :
img = Image.open("fake.jpg")
img = np.array(img)
text_output = "Sorry! I am not able to recognize a frontal view of a car in this image. Please upload a new photo!"
return text_output, img
model.load_state_dict(torch.load("model_modernity.pt", map_location = torch.device("cpu")))
model.eval()
score_t = model(img_pred.unsqueeze(0))
score_t = model_Softmax(score_t)
model_year = score_curr.argmax(1, keepdim = True).item()
score_t = torch.mul(torch.range(0, 14).to(device), torch.reshape(score_t, (-1, ))).sum().item()
model_typicality.load_state_dict(torch.load("model_typicality.pt", map_location = torch.device("cpu")))
model_typicality.eval()
model_part = model_typicality(img_pred.unsqueeze(0))
model_part = model_Softmax(model_part)
model_part = model_part.argmax(1, keepdim = True).item()
model_avg = pd.DataFrame()
h1 = model_typicality.avgpool.register_forward_hook(getActivation("avgpool"))
out = model_typicality(img_pred.unsqueeze(0))
act_pool_t = activation["avgpool"]
h1.remove()
model_year = list(years_dict.keys())[list(years_dict.values()).index(model_year)]
model_part = list(typicality_dict.keys())[list(typicality_dict.values()).index(model_part)]
true_idx = classes_dict[model_part + "_" + model_year]
morph_avg = torch.load("morph.pt")
cos_t = cos(morph_avg[true_idx], act_pool_t).item()
txt = "Modernity score:", str(round(score_t, 2)), "| Typicality score:", str(round(cos_t, 2))
return txt, res
# Launching the app ---------------------------------------------------------
interface = gr.Interface(
predict,
inputs = "image",
outputs = ["text", gr.Image(type = "pil")],
title = "Let's classify your car!")
interface.launch()