Spaces:

Hasani
/

Binary-Image-Classification-In-The-Wild

Runtime error

App Files Files Community

Binary-Image-Classification-In-The-Wild / app.py

IbrahimHasani

Create app.py

897701e almost 2 years ago

raw

history blame

2.74 kB

	from PIL import Image
	from transformers import CLIPProcessor, CLIPModel
	from io import BytesIO
	import gradio as gr


	# Initialize CLIP model and processor
	processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
	model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")

	def image_similarity(image: Image.Image, action_prompt: str) -> bool:
	positive_text = f"a picture of someone {action_prompt}"
	negative_text = f"other"

	inputs = processor(
	text=[positive_text, negative_text],
	images=image,
	return_tensors="pt",
	padding=True
	)

	outputs = model(**inputs)
	logits_per_image = outputs.logits_per_image # image-text similarity score
	probs = logits_per_image.softmax(dim=1) # take the softmax to get the label probabilities

	# Determine if positive prompt has a higher probability than the negative prompt
	result = probs[0][0] > probs[0][1]
	return result


	# Initialize CLIP model and processor
	processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
	model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")

	def image_similarity(image: Image.Image, action_prompt: str):
	positive_text = f"a picture of someone {action_prompt}"
	negative_text = f"not a picture of someone {action_prompt}"

	inputs = processor(
	text=[positive_text, negative_text],
	images=image,
	return_tensors="pt",
	padding=True
	)

	outputs = model(**inputs)
	logits_per_image = outputs.logits_per_image # image-text similarity score
	probs = logits_per_image.softmax(dim=1) # take the softmax to get the label probabilities

	# Determine if positive prompt has a higher probability than the negative prompt
	result = probs[0][0] > probs[0][1]
	return bool(result), f"Probabilities: Positive {probs[0][0]:.4f}, Negative {probs[0][1]:.4f}"

	interface = gr.Interface(
	fn=image_similarity,
	inputs=[
	gr.components.Image(type="pil"),
	gr.components.Text(label="Enter action prompt e.g. 'smiling'")
	],
	outputs=[
	gr.components.Textbox(label="Result"),
	gr.components.Textbox(label="Probabilities")
	],
	title="Engagify's Image Action Detection",
	description="[Author: Ibrahim Hasani] This Method uses CLIP-VIT [Version: BASE-PATCH-16] to determine if an action is being performed in a image or not. (Binaray Classifier). It contrasts an Action against multiple negative labels that are supposedly far enough in the latent semantic space vs the target label. Do not use negative labels in the desired activity, rather the action to be performed.",
	live=False,
	theme=gr.themes.Monochrome(),

	)

	interface.launch()