Spaces:

hasnanmr
/

recognition_anti-spoofing

Sleeping

App Files Files Community

recognition_anti-spoofing / app.py

hasnanmr

change the similarity algorithm

fd50f72 10 months ago

raw

history blame

3.26 kB

	import torch
	from torch import nn
	import torchvision.transforms as transforms
	import cv2
	import numpy as np
	import gradio as gr
	from PIL import Image
	from facenet_pytorch import MTCNN
	from transformers import ViTImageProcessor, ViTModel
	import time

	# Define the ViT class
	class ViT(nn.Module):
	def __init__(self, base_model):
	super(ViT, self).__init__()
	self.base_model = base_model

	def forward(self, x):
	x = self.base_model(x).pooler_output
	return x

	# Load the model and processor
	model_name = "google/vit-base-patch16-224"
	processor = ViTImageProcessor.from_pretrained(model_name)
	base_model = ViTModel.from_pretrained("WinKawaks/vit-small-patch16-224")
	model = ViT(base_model)
	model.load_state_dict(torch.load('faceViT4.pth'))

	# Set the model to evaluation mode
	model.eval()

	# Check if CUDA is available and move the model to GPU if it is
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	model.to(device)

	# Initialize MTCNN for face detection
	mtcnn = MTCNN(keep_all=True, device=device)

	# Define the transformation
	transform = transforms.Compose([
	transforms.Resize((224, 224)),
	transforms.ToTensor()
	])

	def compare_faces(embedding1, embedding2, threshold=1.0):
	dist = np.linalg.norm(embedding1.cpu().numpy() - embedding2.cpu().numpy())
	return dist, dist < threshold

	def align_face(frame):
	# Convert the frame to a PIL image if it's a numpy array
	if isinstance(frame, np.ndarray):
	frame = Image.fromarray(frame)
	boxes, _ = mtcnn.detect(frame)
	if boxes is not None and len(boxes) > 0:
	faces = mtcnn(frame)
	if faces is not None and len(faces) > 0:
	face = faces[0]
	# Convert the face tensor to PIL Image
	face = transforms.ToPILImage()(face)
	return face
	return None

	def l2_normalize(tensor):
	norm = torch.norm(tensor, p=2, dim=1, keepdim=True)
	return tensor / norm

	def process_images(image1, image2):
	start_time = time.time()

	frame1 = np.array(image1)
	frame2 = np.array(image2)

	face1 = align_face(frame1)
	face2 = align_face(frame2)

	if face1 is None or face2 is None:
	return None, "Face not detected in one or both images."

	face1 = transform(face1).unsqueeze(0).to(device)
	face2 = transform(face2).unsqueeze(0).to(device)

	with torch.no_grad():
	embedding1 = model(face1)
	embedding2 = model(face2)

	embedding1 = l2_normalize(embedding1)
	embedding2 = l2_normalize(embedding2)

	distance, is_match = compare_faces(embedding1, embedding2)

	end_time = time.time()
	inference_time = end_time - start_time

	result = f"Distance: {distance:.2f}\nMatch: {is_match}\nInference time: {inference_time:.2f} seconds"

	return (image1, image2), result

	# Create the Gradio interface
	iface = gr.Interface(
	fn=process_images,
	inputs=[gr.Image(type="pil"), gr.Image(type="pil")],
	outputs=[gr.Gallery(), gr.Textbox()],
	title="Face Verification with MTCNN and ViT",
	description="Upload two images and the model will verify if the faces in both images are of the same person."
	)

	# Launch the interface
	iface.launch(share=True, debug=True)