Spaces:
Runtime error
Runtime error
from PIL import Image | |
from transformers import CLIPProcessor, CLIPModel | |
import gradio as gr | |
# Initialize CLIP model and processor | |
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16") | |
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16") | |
def image_similarity(image: Image.Image, positive_prompt: str, negative_prompt: str): | |
inputs = processor( | |
text=[positive_prompt, negative_prompt], | |
images=image, | |
return_tensors="pt", | |
padding=True | |
) | |
outputs = model(**inputs) | |
logits_per_image = outputs.logits_per_image # image-text similarity score | |
probs = logits_per_image.softmax(dim=1) # take the softmax to get the label probabilities | |
# Determine if positive prompt has a higher probability than the negative prompt | |
result = probs[0][0] > probs[0][1] | |
return bool(result), f"Probabilities: Positive {probs[0][0]:.4f}, Negative {probs[0][1]:.4f}" | |
interface = gr.Interface( | |
fn=image_similarity, | |
inputs=[ | |
gr.components.Image(type="pil"), | |
gr.components.Text(label="Enter positive prompt e.g. 'a smiling face'"), | |
gr.components.Text(label="Enter negative prompt e.g. 'a sad face'") | |
], | |
outputs=[ | |
gr.components.Textbox(label="Result"), | |
gr.components.Textbox(label="Probabilities") | |
], | |
title="Engagify's Image Action Detection", | |
description="[Author: Ibrahim Hasani] This Method uses CLIP-VIT [Version: BASE-PATCH-16] to determine if an action is being performed in a image or not. (Binaray Classifier). It contrasts an Action against a negative label. Ensure the prompts accurately describe the desired detection.", | |
live=False, | |
theme=gr.themes.Monochrome(), | |
) | |
interface.launch() | |