Spaces:
Sleeping
Sleeping
# Import packages | |
import pickle | |
import torch | |
import torch.nn as nn | |
from torchvision import transforms, models | |
from PIL import Image | |
from PIL import Image, ImageDraw, ImageFont | |
import gradio as gr | |
import numpy as np | |
import pandas as pd | |
import torch.nn.functional as F | |
from torchcam.methods import SmoothGradCAMpp | |
from torchcam.utils import overlay_mask | |
from torchvision.transforms.functional import to_pil_image | |
from sklearn.metrics.pairwise import cosine_similarity | |
dinov2_vits14 = torch.hub.load("facebookresearch/dinov2", "dinov2_vits14") | |
# Set the device to GPU if available, otherwise use CPU | |
device = torch.device("cpu") | |
dinov2_vits14.to(device) | |
# Define the transformations: convert to tensor, resize, and normalize | |
transform_image = transforms.Compose([transforms.ToTensor(), transforms.Resize(224), transforms.Normalize([0.5], [0.5])]) | |
#model = torch.load("dress_model.pth") | |
class Dress_Class(nn.Module): | |
def __init__(self, num_dim, hidden_layers, num_classes): | |
super(Dress_Class, self).__init__() | |
layers = [] | |
current_num = num_dim | |
for num_neurons in hidden_layers: | |
layers.append(nn.Linear(current_num, num_neurons)) | |
layers.append(nn.ReLU()) | |
current_num = num_neurons | |
layers.append(nn.Linear(current_num, num_classes)) | |
layers.append(nn.Softmax(dim=1)) | |
self.model = nn.Sequential(*layers) | |
def forward(self, x): | |
x = self.model(x) | |
return x | |
best = torch.load("best_mo.pth") | |
hidden_layers = [192, 187, 331, 186, 256] | |
model = Dress_Class(384, hidden_layers, 10) | |
model.state_dict(best) | |
model.eval() | |
with open('saved_dress_morph.pkl', 'rb') as f: | |
loaded_dict = pickle.load(f) | |
def detect(image): | |
size = max(image.size) | |
new_im = Image.new('RGB', (size, size), color = 0) # Create a squared black image | |
new_im.paste(image) | |
new_im = Image.fromarray(new_im.astype(np.uint8)) | |
with torch.no_grad(): | |
# Apply transformations to the image and move it to the appropriate device | |
image_tensor = transform_image(new_im).to(device) | |
# Extract features using the DinoV2 model | |
dino_embedding = dinov2_vits14(image_tensor.unsqueeze(0)).cpu() | |
dino_numpy = dinov2_vits14(image_tensor.unsqueeze(0)).cpu().numpy() | |
with torch.no_grad(): | |
outputs = model(dino_embedding) | |
pred_dress_cat = round(torch.argmax(outputs, dim = 1).tolist()[0]) | |
pred_dress = dress_dict[pred_dress_cat] | |
mean_features = loaded_dict[pred_dress] | |
pred_dress_s = f"Predicted Dress Category: {pred_dress}" | |
cosine_sim = cosine_similarity(dino_numpy.reshape(1, -1), mean_features.reshape(1, -1)).item() | |
cosin = round(float(cosin_sim), 2) | |
return pred_dress_s, cosin | |
demo = gr.Interface( | |
fn=detect, | |
inputs=gr.Image(type="numpy", label="Upload an image"), | |
outputs=[gr.Textbox(label = "Predictions"), | |
gr.Number(label="Typicality Score")], | |
title='Dress Classification') | |
demo.launch() | |