Spaces:

PerceptCLIP
/

models_demo

Sleeping

App Files Files Community

models_demo / app.py

Amitz244

Update app.py

c163326 verified 4 months ago

raw

history blame

2.59 kB

	import torch
	import gradio as gr
	from PIL import Image
	from huggingface_hub import hf_hub_download
	import importlib.util
	from torchvision import transforms

	# Load model
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	# Download model code
	class_path = hf_hub_download(repo_id="PerceptCLIP/PerceptCLIP_Emotions", filename="modeling.py")
	spec = importlib.util.spec_from_file_location("modeling", class_path)
	modeling = importlib.util.module_from_spec(spec)
	spec.loader.exec_module(modeling)

	# Initialize the model
	from modeling import clip_lora_model
	model = clip_lora_model().to(device)

	# Load pretrained weights
	model_path = hf_hub_download(repo_id="PerceptCLIP/PerceptCLIP_Emotions", filename="perceptCLIP_Emotions.pth")
	model.load_state_dict(torch.load(model_path, map_location=device))
	model.eval()

	# Emotion label mapping
	idx2label = {
	0: "amusement",
	1: "awe",
	2: "contentment",
	3: "excitement",
	4: "anger",
	5: "disgust",
	6: "fear",
	7: "sadness"
	}

	# Emoji mapping
	emotion_emoji = {
	"amusement": "😂",
	"awe": "😲",
	"contentment": "😊",
	"excitement": "😃",
	"anger": "😠",
	"disgust": "🤢",
	"fear": "😱",
	"sadness": "😞"
	}

	# Image preprocessing
	def emo_preprocess(image):
	transform = transforms.Compose([
	transforms.Resize(224),
	transforms.CenterCrop(224),
	transforms.ToTensor(),
	transforms.Normalize(mean=(0.4814, 0.4578, 0.4082), std=(0.2686, 0.2613, 0.2758)),
	])
	return transform(image).unsqueeze(0).to(device)

	# Inference function
	def predict_emotion(image):
	# If the image is passed as a PIL Image, you can directly use it
	if isinstance(image, Image.Image):
	img = image.convert("RGB")
	else:
	img = Image.open(image).convert("RGB") # In case the input is a path or something else

	img = emo_preprocess(img)

	with torch.no_grad():
	outputs = model(img)
	predicted = outputs.argmax(1).item()

	emotion = idx2label[predicted]
	emoji = emotion_emoji.get(emotion, "❓") # Default to "?" if no emoji found
	return f"{emotion} {emoji}"

	# Create Gradio interface
	iface = gr.Interface(
	fn=predict_emotion,
	inputs=gr.Image(type="pil", label="Upload an Image"), # Updated line
	outputs=gr.Textbox(label="Emotion + Emoji"), # Updated line
	title="PerceptCLIP-Emotions",
	description="This model predicts the emotion evoked by an image and returns the corresponding emoji along with the emotion name."
	)

	if __name__ == "__main__":
	iface.launch()