Spaces:

Hasani
/

Specific_Object_Recognition_in_the_Wild

Sleeping

App Files Files Community

Specific_Object_Recognition_in_the_Wild / app.py

IbrahimHasani

Update app.py

ab46ec6 verified 11 months ago

raw

history blame

7.81 kB

	import gradio as gr
	import torch
	import numpy as np
	from transformers import OwlViTProcessor, OwlViTForObjectDetection
	from PIL import Image, ImageDraw
	import cv2
	import torch.nn.functional as F
	import tempfile
	import matplotlib.pyplot as plt
	import matplotlib.cm as cm
	from io import BytesIO
	from SuperGluePretrainedNetwork.models.matching import Matching
	from SuperGluePretrainedNetwork.models.utils import read_image

	# Set device
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	# Load models
	model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32").to(device)
	processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")

	matching = Matching({
	'superpoint': {'nms_radius': 4, 'keypoint_threshold': 0.005, 'max_keypoints': 1024},
	'superglue': {'weights': 'outdoor', 'sinkhorn_iterations': 20, 'match_threshold': 0.2}
	}).eval().to(device)

	# Utility functions
	def save_array_to_temp_image(arr):
	rgb_arr = cv2.cvtColor(arr, cv2.COLOR_BGR2RGB)
	img = Image.fromarray(rgb_arr)
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
	temp_file_name = temp_file.name
	temp_file.close()
	img.save(temp_file_name)
	return temp_file_name

	def unified_matching_plot2(image0, image1, kpts0, kpts1, mkpts0, mkpts1, color, text, path=None, show_keypoints=False, fast_viz=False, opencv_display=False, opencv_title='matches', small_text=[]):
	height = min(image0.shape[0], image1.shape[0])
	image0_resized = cv2.resize(image0, (int(image0.shape[1] * height / image0.shape[0]), height))
	image1_resized = cv2.resize(image1, (int(image1.shape[1] * height / image1.shape[0]), height))

	plt.figure(figsize=(15, 15))
	plt.subplot(1, 2, 1)
	plt.imshow(image0_resized)
	plt.scatter(kpts0[:, 0], kpts0[:, 1], color='r', s=1)
	plt.axis('off')

	plt.subplot(1, 2, 2)
	plt.imshow(image1_resized)
	plt.scatter(kpts1[:, 0], kpts1[:, 1], color='r', s=1)
	plt.axis('off')

	fig, ax = plt.subplots(figsize=(20, 20))
	plt.plot([mkpts0[:, 0], mkpts1[:, 0] + image0_resized.shape[1]], [mkpts0[:, 1], mkpts1[:, 1]], 'r', lw=0.5)
	plt.scatter(mkpts0[:, 0], mkpts0[:, 1], s=2, marker='o', color='b')
	plt.scatter(mkpts1[:, 0] + image0_resized.shape[1], mkpts1[:, 1], s=2, marker='o', color='g')
	plt.imshow(np.hstack([image0_resized, image1_resized]), aspect='auto')

	plt.suptitle('\n'.join(text), fontsize=20, fontweight='bold')
	plt.tight_layout()
	plt.show()

	buf = BytesIO()
	plt.savefig(buf, format='png')
	buf.seek(0)
	img_arr = np.frombuffer(buf.getvalue(), dtype=np.uint8)
	buf.close()
	img = cv2.imdecode(img_arr, 1)
	img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
	plt.close(fig)

	return img

	# Main functions
	def detect_and_crop(target_image, query_image, threshold=0.5, nms_threshold=0.3):
	target_sizes = torch.Tensor([target_image.size[::-1]])
	inputs = processor(images=target_image, query_images=query_image, return_tensors="pt").to(device)
	with torch.no_grad():
	outputs = model.image_guided_detection(**inputs)

	img = cv2.cvtColor(np.array(target_image), cv2.COLOR_BGR2RGB)
	outputs.logits = outputs.logits.cpu()
	outputs.target_pred_boxes = outputs.target_pred_boxes.cpu()

	results = processor.post_process_image_guided_detection(outputs=outputs, threshold=threshold, nms_threshold=nms_threshold, target_sizes=target_sizes)
	boxes, scores = results[0]["boxes"], results[0]["scores"]

	if len(boxes) == 0:
	return [], None

	filtered_boxes = []
	for box in boxes:
	x1, y1, x2, y2 = [int(i) for i in box.tolist()]
	cropped_img = img[y1:y2, x1:x2]
	if cropped_img.size != 0:
	filtered_boxes.append(cropped_img)

	draw = ImageDraw.Draw(target_image)
	for box in boxes:
	draw.rectangle(box.tolist(), outline="red", width=3)

	return filtered_boxes, target_image

	def image_matching_no_pyramid(query_img, target_img, visualize=True):
	temp_query = save_array_to_temp_image(np.array(query_img))
	temp_target = save_array_to_temp_image(np.array(target_img))

	image1, inp1, scales1 = read_image(temp_target, device, [640*2], 0, True)
	image0, inp0, scales0 = read_image(temp_query, device, [640*2], 0, True)

	if image0 is None or image1 is None:
	return None

	pred = matching({'image0': inp0, 'image1': inp1})
	pred = {k: v[0] for k, v in pred.items()}
	kpts0, kpts1 = pred['keypoints0'], pred['keypoints1']
	matches, conf = pred['matches0'], pred['matching_scores0']

	valid = matches > -1
	mkpts0 = kpts0[valid]
	mkpts1 = kpts1[matches[valid]]
	mconf = conf[valid]
	color = cm.jet(mconf.detach().cpu().numpy())[:len(mkpts0)]

	valid_count = np.sum(valid.tolist())

	mkpts0_np = mkpts0.cpu().numpy()
	mkpts1_np = mkpts1.cpu().numpy()

	try:
	H, inliers = cv2.findHomography(mkpts0_np, mkpts1_np, cv2.RANSAC, 5.0)
	except:
	inliers = 0

	num_inliers = np.sum(inliers)

	if visualize:
	visualized_img = unified_matching_plot2(
	image0, image1, kpts0, kpts1, mkpts0, mkpts1, color, ['Matches'], True, False, True, 'Matches', [])
	else:
	visualized_img = None

	return {
	'valid': [valid_count],
	'inliers': [num_inliers],
	'visualized_image': [visualized_img]
	}

	def check_object_in_image(query_image, target_image, threshold=50, scale_factor=[0.33, 0.66, 1]):
	images_to_return = []
	cropped_images, bbox_image = detect_and_crop(target_image, query_image)

	temp_files = [save_array_to_temp_image(i) for i in cropped_images]
	crop_results = [image_matching_no_pyramid(query_image, Image.open(i), visualize=True) for i in temp_files]

	cropped_visuals = []
	cropped_inliers = []
	for result in crop_results:
	if result:
	for img in result['visualized_image']:
	cropped_visuals.append(Image.fromarray(img))
	for inliers_ in result['inliers']:
	cropped_inliers.append(inliers_)

	images_to_return.append(stitch_images(cropped_visuals))

	is_present = any(value >= threshold for value in cropped_inliers)

	return {
	'is_present': is_present,
	'images': images_to_return,
	'object detection inliers': [int(i) for i in cropped_inliers],
	'bbox_image': bbox_image,
	}

	def interface(poster_source, media_source, threshold, scale_factor):
	result1 = check_object_in_image(poster_source, media_source, threshold, scale_factor)
	if result1['is_present']:
	return result1

	result2 = check_object_in_image(poster_source, media_source, threshold, scale_factor)
	return result2 if result2['is_present'] else result1

	iface = gr.Interface(
	fn=interface,
	inputs=[
	gr.Image(type="pil", label="Upload a Query Image (Poster)"),
	gr.Image(type="pil", label="Upload a Target Image (Media)"),
	gr.Slider(minimum=0, maximum=100, step=1, value=50, label="Threshold"),
	gr.CheckboxGroup(choices=[0.33, 0.66, 1.0], value=[0.33, 0.66, 1.0], label="Scale Factors")
	],
	outputs=[
	gr.JSON(label="Result")
	],
	title="Object Detection in Image",
	description="""
	Instructions:

	1. Upload a Query Image (Poster): Select an image file that contains the object you want to detect.
	2. Upload a Target Image (Media): Select an image file where you want to detect the object.
	3. Set Threshold: Adjust the slider to set the threshold for object detection.
	4. Set Scale Factors: Select the scale factors for image pyramid.
	5. View Results: The result will show whether the object is present in the image along with additional details.
	"""
	)

	if __name__ == "__main__":
	iface.launch()