Spaces:
Running
Running
import torch | |
import gradio as gr | |
from PIL import Image | |
from huggingface_hub import hf_hub_download | |
import importlib.util | |
from torchvision import transforms | |
# Load model | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
# Download model code | |
class_path = hf_hub_download(repo_id="PerceptCLIP/PerceptCLIP_Emotions", filename="modeling.py") | |
spec = importlib.util.spec_from_file_location("modeling", class_path) | |
modeling = importlib.util.module_from_spec(spec) | |
spec.loader.exec_module(modeling) | |
# Initialize the model | |
from modeling import clip_lora_model | |
model = clip_lora_model().to(device) | |
# Load pretrained weights | |
model_path = hf_hub_download(repo_id="PerceptCLIP/PerceptCLIP_Emotions", filename="perceptCLIP_Emotions.pth") | |
model.load_state_dict(torch.load(model_path, map_location=device)) | |
model.eval() | |
# Emotion label mapping | |
idx2label = { | |
0: "amusement", | |
1: "awe", | |
2: "contentment", | |
3: "excitement", | |
4: "anger", | |
5: "disgust", | |
6: "fear", | |
7: "sadness" | |
} | |
# Emoji mapping | |
emotion_emoji = { | |
"amusement": "π", | |
"awe": "π²", | |
"contentment": "π", | |
"excitement": "π", | |
"anger": "π ", | |
"disgust": "π€’", | |
"fear": "π±", | |
"sadness": "π" | |
} | |
# Image preprocessing | |
def emo_preprocess(image): | |
transform = transforms.Compose([ | |
transforms.Resize(224), | |
transforms.CenterCrop(224), | |
transforms.ToTensor(), | |
transforms.Normalize(mean=(0.4814, 0.4578, 0.4082), std=(0.2686, 0.2613, 0.2758)), | |
]) | |
return transform(image).unsqueeze(0).to(device) | |
# Inference function | |
def predict_emotion(image): | |
# If the image is passed as a PIL Image, you can directly use it | |
if isinstance(image, Image.Image): | |
img = image.convert("RGB") | |
else: | |
img = Image.open(image).convert("RGB") # In case the input is a path or something else | |
img = emo_preprocess(img) | |
with torch.no_grad(): | |
outputs = model(img) | |
predicted = outputs.argmax(1).item() | |
emotion = idx2label[predicted] | |
emoji = emotion_emoji.get(emotion, "β") # Default to "?" if no emoji found | |
return f"{emotion} {emoji}" | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=predict_emotion, | |
inputs=gr.Image(type="pil", label="Upload an Image"), # Updated line | |
outputs=gr.Textbox(label="Emotion + Emoji"), # Updated line | |
title="PerceptCLIP-Emotions", | |
description="This model predicts the emotion evoked by an image and returns the corresponding emoji along with the emotion name." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |