# Importing all necessary libraries ------------------------------------------ from PIL import Image import gradio as gr import numpy as np import pandas as pd import torch import torch.nn as nn from torchvision import models, transforms import sys, os, distutils.core import detectron2 from detectron2 import model_zoo from detectron2.utils.logger import setup_logger from detectron2.engine import DefaultPredictor from detectron2.config import get_cfg # Model setup --------------------------------------------------------------- sys.path.insert(0, os.path.abspath("./detectron2")) setup_logger() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") n_classes = 15 criterion = nn.CrossEntropyLoss() # Main model model = models.resnet18(pretrained = True) for param in model.parameters(): param.require_grad = False n_features = model.fc.in_features model.fc = nn.Linear(n_features, n_classes) model = model.to(device) # Viewpoint model model_viewpoint = models.resnet18(pretrained = True) for param in model_viewpoint.parameters(): param.require_grad = False n_features = model_viewpoint.fc.in_features model_viewpoint.fc = nn.Linear(n_features, 4) model_viewpoint = model_viewpoint.to(device) # Typicality model model_typicality = models.resnet18(pretrained = True) for param in model_typicality.parameters(): param.require_grad = False n_features = model_typicality.fc.in_features model_typicality.fc = nn.Linear(n_features, 5) model_typicality = model_typicality.to(device) model_Softmax = nn.Softmax(dim = 1) cos = nn.CosineSimilarity() # Transformations to the test set test_transforms = transforms.Compose( [transforms.Resize(size = (224, 224)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ] ) # Helper functions ---------------------------------------------------------- def accuracy(y_pred, y): top_pred = y_pred.argmax(1, keepdim = True) correct = top_pred.eq(y.view_as(top_pred)).sum() acc = correct.float() / y.shape[0] return acc activation = {} def getActivation(name): def hook(model_typicality, input, output): activation[name] = output.detach() return hook def save_image_locally(image_array_FN, path_FN = "fake.jpg"): image_array_FN = image_array_FN.astype(np.uint8) data = Image.fromarray(image_array_FN) data.save(path_FN) return None # Prediction ---------------------------------------------------------------- typicality_dict = {"Convertible": 0, "Hatchback": 1, "MPV": 2, "SUV": 3, "Saloon": 4} classes_dict = {"Convertible_2000": 0, "Convertible_2003": 1, "Convertible_2006": 2, "Convertible_2007": 3, "Convertible_2008": 4, "Convertible_2009": 5, "Convertible_2010": 6, "Convertible_2011": 7, "Convertible_2012": 8, "Convertible_2013": 9, "Convertible_2014": 10, "Convertible_2015": 11, "Convertible_2016": 12, "Convertible_2017": 13, "Hatchback_2000": 14, "Hatchback_2003": 15, "Hatchback_2006": 16, "Hatchback_2007": 17, "Hatchback_2008": 18, "Hatchback_2009": 19, "Hatchback_2010": 20, "Hatchback_2011": 21, "Hatchback_2012": 22, "Hatchback_2013": 23, "Hatchback_2014": 24, "Hatchback_2015": 25, "Hatchback_2016": 26, "Hatchback_2017": 27, "MPV_2000": 28, "MPV_2003": 29, "MPV_2006": 30, "MPV_2007": 31, "MPV_2008": 32, "MPV_2009": 33, "MPV_2010": 34, "MPV_2011": 35, "MPV_2012": 36, "MPV_2013": 37, "MPV_2014": 38, "MPV_2015": 39, "MPV_2016": 40, "MPV_2017": 41, "MPV_2018": 42, "SUV_2000": 43, "SUV_2003": 44, "SUV_2006": 45, "SUV_2007": 46, "SUV_2008": 47, "SUV_2009": 48, "SUV_2010": 49, "SUV_2011": 50, "SUV_2012": 51, "SUV_2013": 52, "SUV_2014": 53, "SUV_2015": 54, "SUV_2016": 55, "SUV_2017": 56, "SUV_2018": 57, "Saloon_2000": 58, "Saloon_2003": 59, "Saloon_2006": 60, "Saloon_2007": 61, "Saloon_2008": 62, "Saloon_2009": 63, "Saloon_2010": 64, "Saloon_2011": 65, "Saloon_2012": 66, "Saloon_2013": 67, "Saloon_2014": 68, "Saloon_2015": 69, "Saloon_2016": 70, "Saloon_2017": 71, "Saloon_2018": 72} years_dict = {"2000": 0, "2003": 1, "2006": 2, "2007": 3, "2008": 4, "2009": 5, "2010": 6, "2011": 7, "2012": 8, "2013": 9, "2014": 10, "2015": 11, "2016": 12, "2017": 13, "2018": 14} dist = distutils.core.run_setup("./detectron2/setup.py") cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) cfg.model.roi_heads.score_thresh_test = 0.5 cfg.model.weights = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") cfg.model.device = "cpu" predictor = DefaultPredictor(cfg) def predict(img_F): target_class = 2 img = Image.fromarray(img_F.astype("uint8"), "RGB") img = np.array(img) outputs = predictor(img) masks = outputs["instances"].pred_masks pred_classes = outputs["instances"].pred_classes.tolist() pred_boxes = list(outputs["instances"].pred_boxes) areas = torch.sum(torch.flatten(masks, start_dim = 1), dim = 1).tolist() total_area = [] car_area = [] for idx in range(len(pred_classes)): if pred_classes[idx] == target_class: total_area.append(areas[idx]) car_area.append(idx) if len(car_area) == 0: img = Image.open("init.jpg") img = np.array(img) text_output = "Sorry! I am not able to recognize a car in this image. Please upload a new photo!" return text_output, img local_idx = total_area.index(max(total_area)) global_idx = car_area[local_idx] unsq = outputs["instances"].pred_masks[index_global].unsqueeze(-1).to("cpu") mult = torch.tensor(img) * unsq unsq = unsq.int() unsq[unsq == 0] = 255 unsq[unsq == 1] = 0 mult = mult + unsq res = mult.numpy() save_image_locally(res, path_FN = "fake.jpg") img_pred = Image.open("fake.jpg") img_pred = test_transforms(img_pred) model_viewpoint.load_state_dict(torch.load("model_viewpoint.pt", map_location = torch.device("cpu"))) model_viewpoint.eval() y_pred = model_viewpoint(img_pred.unsqueeze(0)) y_pred = model_Softmax(y_pred) top_pred = y_pred.argmax(1, keepdim = True) if top_pred.item() not in [0, 6] : img = Image.open("fake.jpg") img = np.array(img) text_output = "Sorry! I am not able to recognize a frontal view of a car in this image. Please upload a new photo!" return text_output, img model.load_state_dict(torch.load("model_modernity.pt", map_location = torch.device("cpu"))) model.eval() score_t = model(img_pred.unsqueeze(0)) score_t = model_Softmax(score_t) model_year = score_curr.argmax(1, keepdim = True).item() score_t = torch.mul(torch.range(0, 14).to(device), torch.reshape(score_t, (-1, ))).sum().item() model_typicality.load_state_dict(torch.load("model_typicality.pt", map_location = torch.device("cpu"))) model_typicality.eval() model_part = model_typicality(img_pred.unsqueeze(0)) model_part = model_Softmax(model_part) model_part = model_part.argmax(1, keepdim = True).item() model_avg = pd.DataFrame() h1 = model_typicality.avgpool.register_forward_hook(getActivation("avgpool")) out = model_typicality(img_pred.unsqueeze(0)) act_pool_t = activation["avgpool"] h1.remove() model_year = list(years_dict.keys())[list(years_dict.values()).index(model_year)] model_part = list(typicality_dict.keys())[list(typicality_dict.values()).index(model_part)] true_idx = classes_dict[model_part + "_" + model_year] morph_avg = torch.load("morph.pt") cos_t = cos(morph_avg[true_idx], act_pool_t).item() txt = "Modernity score:", str(round(score_t, 2)), "| Typicality score:", str(round(cos_t, 2)) return txt, res # Launching the app --------------------------------------------------------- interface = gr.Interface( predict, inputs = "image", outputs = ["text", gr.Image(type = "pil")], title = "Let's classify your car!") interface.launch()