# Import packages import pickle import torch import torch.nn as nn from torchvision import transforms, models from PIL import Image from PIL import Image, ImageDraw, ImageFont import gradio as gr import numpy as np import pandas as pd import torch.nn.functional as F from torchcam.methods import SmoothGradCAMpp from torchcam.utils import overlay_mask from torchvision.transforms.functional import to_pil_image from sklearn.metrics.pairwise import cosine_similarity dinov2_vits14 = torch.hub.load("facebookresearch/dinov2", "dinov2_vits14") # Set the device to GPU if available, otherwise use CPU device = torch.device("cpu") dinov2_vits14.to(device) # Define the transformations: convert to tensor, resize, and normalize transform_image = transforms.Compose([transforms.ToTensor(), transforms.Resize(224), transforms.Normalize([0.5], [0.5])]) #model = torch.load("dress_model.pth") class Dress_Class(nn.Module): def __init__(self, num_dim, hidden_layers, num_classes): super(Dress_Class, self).__init__() layers = [] current_num = num_dim for num_neurons in hidden_layers: layers.append(nn.Linear(current_num, num_neurons)) layers.append(nn.ReLU()) current_num = num_neurons layers.append(nn.Linear(current_num, num_classes)) layers.append(nn.Softmax(dim=1)) self.model = nn.Sequential(*layers) def forward(self, x): x = self.model(x) return x best = torch.load("best_mo.pth") hidden_layers = [192, 187, 331, 186, 256] model = Dress_Class(384, hidden_layers, 10) model.state_dict(best) model.eval() with open('saved_dress_morph.pkl', 'rb') as f: loaded_dict = pickle.load(f) def detect(image): size = max(image.size) new_im = Image.new('RGB', (size, size), color = 0) # Create a squared black image new_im.paste(image) new_im = Image.fromarray(new_im.astype(np.uint8)) with torch.no_grad(): # Apply transformations to the image and move it to the appropriate device image_tensor = transform_image(new_im).to(device) # Extract features using the DinoV2 model dino_embedding = dinov2_vits14(image_tensor.unsqueeze(0)).cpu() dino_numpy = dinov2_vits14(image_tensor.unsqueeze(0)).cpu().numpy() with torch.no_grad(): outputs = model(dino_embedding) pred_dress_cat = round(torch.argmax(outputs, dim = 1).tolist()[0]) pred_dress = dress_dict[pred_dress_cat] mean_features = loaded_dict[pred_dress] pred_dress_s = f"Predicted Dress Category: {pred_dress}" cosine_sim = cosine_similarity(dino_numpy.reshape(1, -1), mean_features.reshape(1, -1)).item() cosin = round(float(cosin_sim), 2) return pred_dress_s, cosin demo = gr.Interface( fn=detect, inputs=gr.Image(type="numpy", label="Upload an image"), outputs=[gr.Textbox(label = "Predictions"), gr.Number(label="Typicality Score")], title='Dress Classification') demo.launch()