Spaces:

Hasani
/

Binary-Image-Classification-In-The-Wild

Runtime error

App Files Files Community

Binary-Image-Classification-In-The-Wild / app.py

IbrahimHasani

Update app.py

dab8972 over 1 year ago

raw

history blame

1.87 kB

	from PIL import Image
	from transformers import CLIPProcessor, CLIPModel
	import gradio as gr

	# Initialize CLIP model and processor
	processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
	model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")

	def image_similarity(image: Image.Image, positive_prompt: str, negative_prompts: list):

	prompts = [positive_prompt] + negative_prompts

	inputs = processor(
	text=prompts,
	images=image,
	return_tensors="pt",
	padding=True
	)

	outputs = model(**inputs)
	logits_per_image = outputs.logits_per_image
	probs = logits_per_image.softmax(dim=1)

	# Determine if positive prompt has a higher probability than any of the negative prompts
	is_positive_highest = probs[0][0] > max(probs[0][1:])

	return bool(is_positive_highest), f"Probability for Positive Prompt: {probs[0][0]:.4f}"

	interface = gr.Interface(
	fn=image_similarity,
	inputs=[
	gr.components.Image(type="pil"),
	gr.components.Text(label="Enter positive prompt e.g. 'a person drinking a beverage'"),
	gr.components.Textbox(label="Enter negative prompts, separated by semicolon e.g. 'an empty scene; person without beverage'", placeholder="negative prompt 1; negative prompt 2; ..."),
	],
	outputs=[
	gr.components.Textbox(label="Result"),
	gr.components.Textbox(label="Probability for Positive Prompt")
	],
	title="Engagify's Image Action Detection",
	description="[Author: Ibrahim Hasani] This Method uses CLIP-VIT [Version: BASE-PATCH-16] to determine if an action is being performed in an image or not. (Binary Classifier). It contrasts an Action against multiple negative labels. Ensure the prompts accurately describe the desired detection.",
	live=False,
	theme=gr.themes.Monochrome(),

	)

	interface.launch()