# Import packages
import pickle
import torch
import torch.nn as nn
from torchvision import transforms, models
from PIL import Image
from PIL import Image, ImageDraw, ImageFont
import gradio as gr
import numpy as np
import pandas as pd
import torch.nn.functional as F
from torchcam.methods import SmoothGradCAMpp
from torchcam.utils import overlay_mask
from torchvision.transforms.functional import to_pil_image
from sklearn.metrics.pairwise import cosine_similarity

dinov2_vits14 = torch.hub.load("facebookresearch/dinov2", "dinov2_vits14")
# Set the device to GPU if available, otherwise use CPU
device = torch.device("cpu")
dinov2_vits14.to(device)

# Define the transformations: convert to tensor, resize, and normalize
transform_image = transforms.Compose([transforms.ToTensor(), transforms.Resize(224), transforms.Normalize([0.5], [0.5])])

#model = torch.load("dress_model.pth")
class Dress_Class(nn.Module):
    def __init__(self, num_dim, hidden_layers, num_classes):
        super(Dress_Class, self).__init__()
        layers = []
        current_num = num_dim
        
        for num_neurons in hidden_layers:
            layers.append(nn.Linear(current_num, num_neurons))
            layers.append(nn.ReLU())
            current_num = num_neurons
        
        layers.append(nn.Linear(current_num, num_classes))
        layers.append(nn.Softmax(dim=1))
        self.model = nn.Sequential(*layers)
        
    def forward(self, x):
        x = self.model(x)
        return x
        
best = torch.load("best_mo.pth")       
hidden_layers = [192, 187, 331, 186, 256]
model = Dress_Class(384, hidden_layers, 10)
model.state_dict(best)
model.eval()

with open('saved_dress_morph.pkl', 'rb') as f:
    loaded_dict = pickle.load(f)

def detect(image):
    
    size = max(image.size)
    new_im = Image.new('RGB', (size, size), color = 0) # Create a squared black image
    new_im.paste(image)
    new_im = Image.fromarray(new_im.astype(np.uint8))
    
    with torch.no_grad():
  
  # Apply transformations to the image and move it to the appropriate device
        image_tensor = transform_image(new_im).to(device)
  
  # Extract features using the DinoV2 model
        dino_embedding = dinov2_vits14(image_tensor.unsqueeze(0)).cpu()
        dino_numpy = dinov2_vits14(image_tensor.unsqueeze(0)).cpu().numpy()

    
    with torch.no_grad():
        outputs = model(dino_embedding)
        pred_dress_cat = round(torch.argmax(outputs, dim = 1).tolist()[0])

    pred_dress = dress_dict[pred_dress_cat]

    mean_features = loaded_dict[pred_dress]

    pred_dress_s = f"Predicted Dress Category:  {pred_dress}"

    cosine_sim = cosine_similarity(dino_numpy.reshape(1, -1), mean_features.reshape(1, -1)).item()
    cosin = round(float(cosin_sim), 2)

    return pred_dress_s, cosin

demo = gr.Interface(
fn=detect,
inputs=gr.Image(type="numpy", label="Upload an image"),
outputs=[gr.Textbox(label = "Predictions"),
         gr.Number(label="Typicality Score")],
        title='Dress Classification')
    
demo.launch()