Spaces:

hasnanmr
/

recognition_anti-spoofing

Sleeping

App Files Files Community

recognition_anti-spoofing / app.py

hasnanmr

fixing metric verification

7c75f13 6 months ago

raw

history blame

4.12 kB

	import torch
	import torch.nn as nn
	import numpy as np
	from PIL import Image, ImageDraw
	from torchvision import transforms
	from transformers import ViTImageProcessor, ViTModel
	from facenet_pytorch import MTCNN
	import gradio as gr
	import time

	# Define the Vision Transformer (ViT) architecture
	class ViT(nn.Module):
	def __init__(self, base_model):
	super(ViT, self).__init__()
	self.base_model = base_model
	self.dropout = nn.Dropout(p=0.2)
	self.fc = nn.Linear(base_model.config.hidden_size, 512)
	self.dropout2 = nn.Dropout(p=0.2)
	self.l2_norm = nn.functional.normalize

	def forward(self, x):
	x = self.base_model(x).pooler_output
	x = self.dropout(x)
	x = self.fc(x)
	x = self.dropout2(x)
	x = self.l2_norm(x, p=2, dim=1) # Apply L2 normalization
	return x

	# Load the pre-trained ViT model and processor
	model_name = "google/vit-base-patch16-224"
	processor = ViTImageProcessor.from_pretrained(model_name)
	base_model = ViTModel.from_pretrained(model_name)
	model = ViT(base_model)
	model_path = r'best_vit11.pth'
	model.load_state_dict(torch.load(model_path))
	model.eval().to('cuda' if torch.cuda.is_available() else 'cpu')

	# Initialize MTCNN for face detection
	mtcnn = MTCNN(keep_all=True, min_face_size=12, device='cuda' if torch.cuda.is_available() else 'cpu')

	def align_face(frame):
	# Convert the frame to a PIL image if it's a numpy array
	if isinstance(frame, np.ndarray):
	frame = Image.fromarray(frame)
	boxes, _ = mtcnn.detect(frame)
	if boxes is not None and len(boxes) > 0:
	faces = mtcnn(frame)
	if faces is not None and len(faces) > 0:
	face = faces[0]
	# Convert the face tensor to PIL Image
	face = transforms.ToPILImage()(face)
	return face, boxes[0]
	return None, None

	def draw_bounding_box(image, box):
	draw = ImageDraw.Draw(image)
	draw.rectangle(box.tolist(), outline="red", width=3)
	return image

	def euclidean_distance(embedding1, embedding2):
	return np.linalg.norm(embedding1 - embedding2)

	def process_images(image1, image2):
	start_time = time.time()

	frame1 = np.array(image1)
	frame2 = np.array(image2)

	face1, box1 = align_face(frame1)
	face2, box2 = align_face(frame2)

	if face1 is None or face2 is None:
	return None, "Face not detected in one or both images."

	# Use processor to preprocess the images
	face1 = processor(images=face1, return_tensors="pt").pixel_values.to('cuda' if torch.cuda.is_available() else 'cpu')
	face2 = processor(images=face2, return_tensors="pt").pixel_values.to('cuda' if torch.cuda.is_available() else 'cpu')

	with torch.no_grad():
	embedding1 = model(face1).cpu().numpy()
	embedding2 = model(face2).cpu().numpy()

	# Flatten the embeddings if necessary (ensuring they are 1D)
	embedding1 = embedding1.flatten()
	embedding2 = embedding2.flatten()

	euclidean_dist = euclidean_distance(embedding1, embedding2)
	is_match = euclidean_dist < 0.3

	# Calculate confidence
	confidence = max(0.0, 1.0 - euclidean_dist / 1.0) # Ensure confidence is between 0 and 1
	print(f'confidence={confidence}')
	end_time = time.time()
	inference_time = end_time - start_time

	# Draw bounding boxes on the original images
	image1_with_box = draw_bounding_box(image1, box1)
	image2_with_box = draw_bounding_box(image2, box2)

	result = f"Euclidean Distance: {euclidean_dist:.2f}\n"
	result += f"Match: {is_match}\n"
	result += f"Inference time: {inference_time:.2f} seconds"

	return [image1_with_box, image2_with_box], result

	# Create the Gradio interface
	iface = gr.Interface(
	fn=process_images,
	inputs=[gr.Image(type="pil"), gr.Image(type="pil")],
	outputs=[gr.Gallery(), gr.Textbox()],
	title="Face Verification with Vision Transformer",
	description="Upload two images and the model will verify if the faces in both images are of the same person."
	)

	# Launch the interface
	iface.launch(share=True, debug=True)