Spaces:

lneduchal
/

DS405B_space

Runtime error

File size: 7,776 Bytes


# Importing all necessary libraries ------------------------------------------

from PIL import Image
import gradio as gr

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torchvision import models, transforms

import sys, os, distutils.core

import detectron2
from detectron2 import model_zoo
from detectron2.utils.logger import setup_logger
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg


# Model setup ---------------------------------------------------------------

sys.path.insert(0, os.path.abspath("./detectron2"))
setup_logger()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

n_classes = 15
criterion = nn.CrossEntropyLoss()

# Main model
model = models.resnet18(pretrained = True)
for param in model.parameters():
  param.require_grad = False
n_features = model.fc.in_features
model.fc = nn.Linear(n_features, n_classes)
model = model.to(device)

# Viewpoint model
model_viewpoint = models.resnet18(pretrained = True)
for param in model_viewpoint.parameters():
    param.require_grad = False
n_features = model_viewpoint.fc.in_features
model_viewpoint.fc = nn.Linear(n_features, 4)
model_viewpoint = model_viewpoint.to(device)

# Typicality model
model_typicality = models.resnet18(pretrained = True)
for param in model_typicality.parameters():
  param.require_grad = False
n_features = model_typicality.fc.in_features
model_typicality.fc = nn.Linear(n_features, 5)
model_typicality = model_typicality.to(device)
model_Softmax = nn.Softmax(dim = 1)
cos = nn.CosineSimilarity()

# Transformations to the test set
test_transforms = transforms.Compose(
    [transforms.Resize(size = (224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]
)


# Helper functions ----------------------------------------------------------

def accuracy(y_pred, y):
    top_pred = y_pred.argmax(1, keepdim = True)
    correct = top_pred.eq(y.view_as(top_pred)).sum()
    acc = correct.float() / y.shape[0]
    return acc

activation = {}
def getActivation(name):
	def hook(model_typicality, input, output):
		activation[name] = output.detach()
	return hook

def save_image_locally(image_array_FN, path_FN = "fake.jpg"):
  image_array_FN = image_array_FN.astype(np.uint8)
  data = Image.fromarray(image_array_FN)
  data.save(path_FN)
  return None


# Prediction ----------------------------------------------------------------

typicality_dict = {"Convertible": 0, "Hatchback": 1, "MPV": 2, "SUV": 3, "Saloon": 4}
classes_dict = {"Convertible_2000": 0, "Convertible_2003": 1, "Convertible_2006": 2, "Convertible_2007": 3, "Convertible_2008": 4, "Convertible_2009": 5, "Convertible_2010": 6, "Convertible_2011": 7, "Convertible_2012": 8, "Convertible_2013": 9, "Convertible_2014": 10, "Convertible_2015": 11, "Convertible_2016": 12, "Convertible_2017": 13, "Hatchback_2000": 14, "Hatchback_2003": 15, "Hatchback_2006": 16, "Hatchback_2007": 17, "Hatchback_2008": 18, "Hatchback_2009": 19, "Hatchback_2010": 20, "Hatchback_2011": 21, "Hatchback_2012": 22, "Hatchback_2013": 23, "Hatchback_2014": 24, "Hatchback_2015": 25, "Hatchback_2016": 26, "Hatchback_2017": 27, "MPV_2000": 28, "MPV_2003": 29, "MPV_2006": 30, "MPV_2007": 31, "MPV_2008": 32, "MPV_2009": 33, "MPV_2010": 34, "MPV_2011": 35, "MPV_2012": 36, "MPV_2013": 37, "MPV_2014": 38, "MPV_2015": 39, "MPV_2016": 40, "MPV_2017": 41, "MPV_2018": 42, "SUV_2000": 43, "SUV_2003": 44, "SUV_2006": 45, "SUV_2007": 46, "SUV_2008": 47, "SUV_2009": 48, "SUV_2010": 49, "SUV_2011": 50, "SUV_2012": 51, "SUV_2013": 52, "SUV_2014": 53, "SUV_2015": 54, "SUV_2016": 55, "SUV_2017": 56, "SUV_2018": 57, "Saloon_2000": 58, "Saloon_2003": 59, "Saloon_2006": 60, "Saloon_2007": 61, "Saloon_2008": 62, "Saloon_2009": 63, "Saloon_2010": 64, "Saloon_2011": 65, "Saloon_2012": 66, "Saloon_2013": 67, "Saloon_2014": 68, "Saloon_2015": 69, "Saloon_2016": 70, "Saloon_2017": 71, "Saloon_2018": 72}
years_dict = {"2000": 0, "2003": 1, "2006": 2, "2007": 3, "2008": 4, "2009": 5, "2010": 6, "2011": 7, "2012": 8, "2013": 9, "2014": 10, "2015": 11, "2016": 12, "2017": 13, "2018": 14}


dist = distutils.core.run_setup("./detectron2/setup.py")
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.model.roi_heads.score_thresh_test = 0.5
cfg.model.weights = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.model.device = "cpu"
predictor = DefaultPredictor(cfg)

def predict(img_F):
	
    target_class = 2

    img = Image.fromarray(img_F.astype("uint8"), "RGB")
	img = np.array(img)

	outputs = predictor(img)
	masks = outputs["instances"].pred_masks

	pred_classes = outputs["instances"].pred_classes.tolist()
	pred_boxes = list(outputs["instances"].pred_boxes)

	areas =  torch.sum(torch.flatten(masks, start_dim = 1), dim = 1).tolist()
	total_area = []
	car_area = []

	for idx in range(len(pred_classes)):
        if pred_classes[idx] == target_class:
            total_area.append(areas[idx])
            car_area.append(idx)

    if len(car_area) == 0:
        img = Image.open("init.jpg")
        img = np.array(img)
        text_output = "Sorry! I am not able to recognize a car in this image. Please upload a new photo!"
        return text_output, img

	local_idx = total_area.index(max(total_area))
	global_idx = car_area[local_idx]

	unsq = outputs["instances"].pred_masks[index_global].unsqueeze(-1).to("cpu")
	mult = torch.tensor(img) * unsq
	
    unsq = unsq.int()
	unsq[unsq == 0] = 255
	unsq[unsq == 1] = 0
	mult = mult + unsq
	res = mult.numpy()

	save_image_locally(res, path_FN = "fake.jpg")

	img_pred =  Image.open("fake.jpg")
	img_pred = test_transforms(img_pred)

	model_viewpoint.load_state_dict(torch.load("model_viewpoint.pt", map_location = torch.device("cpu")))
	model_viewpoint.eval()
	y_pred = model_viewpoint(img_pred.unsqueeze(0))
	y_pred = model_Softmax(y_pred)
	top_pred = y_pred.argmax(1, keepdim = True)

	if top_pred.item() not in [0, 6] :
		img = Image.open("fake.jpg")
		img = np.array(img)
		text_output = "Sorry! I am not able to recognize a frontal view of a car in this image. Please upload a new photo!"
		return text_output, img

	model.load_state_dict(torch.load("model_modernity.pt", map_location = torch.device("cpu")))
	model.eval()
	
    score_t = model(img_pred.unsqueeze(0))
	score_t = model_Softmax(score_t)
	model_year = score_curr.argmax(1, keepdim = True).item()
	score_t = torch.mul(torch.range(0, 14).to(device), torch.reshape(score_t, (-1, ))).sum().item()

	model_typicality.load_state_dict(torch.load("model_typicality.pt", map_location = torch.device("cpu")))
	model_typicality.eval()
	model_part = model_typicality(img_pred.unsqueeze(0))
	model_part = model_Softmax(model_part)
	model_part = model_part.argmax(1, keepdim = True).item()

	model_avg = pd.DataFrame()
	h1 = model_typicality.avgpool.register_forward_hook(getActivation("avgpool"))
	out = model_typicality(img_pred.unsqueeze(0))
	act_pool_t = activation["avgpool"]
	h1.remove()

	model_year = list(years_dict.keys())[list(years_dict.values()).index(model_year)]
	model_part = list(typicality_dict.keys())[list(typicality_dict.values()).index(model_part)]
	true_idx = classes_dict[model_part + "_" + model_year]

	morph_avg = torch.load("morph.pt")
	cos_t = cos(morph_avg[true_idx], act_pool_t).item()

	txt = "Modernity score:", str(round(score_t, 2)), "| Typicality score:", str(round(cos_t, 2))

	return txt, res


# Launching the app ---------------------------------------------------------

interface  = gr.Interface(
    predict, 
    inputs = "image", 
    outputs = ["text", gr.Image(type = "pil")], 
    title = "Let's classify your car!")
interface.launch()