Spaces:

hasnanmr
/

recognition_anti-spoofing

Sleeping

App Files Files Community

recognition_anti-spoofing / app.py

hasnanmr

test new model

ea05742 6 months ago

raw

history blame

8.23 kB

	# import torch
	# import torchvision.transforms as transforms
	# import numpy as np
	# import gradio as gr
	# from PIL import Image, ImageDraw
	# from facenet_pytorch import MTCNN, InceptionResnetV1
	# import time

	# # Initialize MTCNN for face detection with smaller face size detection
	# mtcnn = MTCNN(keep_all=True, device='cuda' if torch.cuda.is_available() else 'cpu', min_face_size=20)

	# # Load the pre-trained FaceNet model
	# facenet = InceptionResnetV1(pretrained='vggface2').eval().to('cuda' if torch.cuda.is_available() else 'cpu')
	# model_path = r'faceNet_update_transformation.pth'
	# model_state_dict = torch.load(model_path)
	# facenet.load_state_dict(model_state_dict)
	# facenet.eval() # Set the model to evaluation mode

	# # Define the transformation with normalization
	# val_test_transform = transforms.Compose([
	# transforms.Resize((160, 160)), # FaceNet expects 160x160 input
	# transforms.ToTensor(),
	# transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
	# ])

	# def compare_faces(embedding1, embedding2, threshold=0.2): # Adjusted threshold
	# dist = np.linalg.norm(embedding1 - embedding2)
	# return dist, dist < threshold

	# def align_face(frame):
	# # Convert the frame to a PIL image if it's a numpy array
	# if isinstance(frame, np.ndarray):
	# frame = Image.fromarray(frame)
	# boxes, _ = mtcnn.detect(frame)
	# if boxes is not None and len(boxes) > 0:
	# faces = mtcnn(frame)
	# if faces is not None and len(faces) > 0:
	# face = faces[0]
	# # Convert the face tensor to PIL Image
	# face = transforms.ToPILImage()(face)
	# return face, boxes[0]
	# return None, None

	# def draw_bounding_box(image, box):
	# draw = ImageDraw.Draw(image)
	# draw.rectangle(box.tolist(), outline="red", width=3)
	# return image

	# def l2_normalize(tensor):
	# norm = np.linalg.norm(tensor, ord=2, axis=1, keepdims=True)
	# return tensor / norm

	# def process_images(image1, image2):
	# start_time = time.time()

	# frame1 = np.array(image1)
	# frame2 = np.array(image2)

	# face1, box1 = align_face(frame1)
	# face2, box2 = align_face(frame2)

	# if face1 is None or face2 is None:
	# return None, "Face not detected in one or both images."

	# face1 = val_test_transform(face1).unsqueeze(0).to('cuda' if torch.cuda.is_available() else 'cpu')
	# face2 = val_test_transform(face2).unsqueeze(0).to('cuda' if torch.cuda.is_available() else 'cpu')

	# with torch.no_grad():
	# embedding1 = facenet(face1).cpu().numpy()
	# embedding2 = facenet(face2).cpu().numpy()

	# embedding1 = l2_normalize(embedding1)
	# embedding2 = l2_normalize(embedding2)

	# distance, is_match = compare_faces(embedding1, embedding2, threshold=0.2)

	# # Calculate confidence
	# confidence = max(0.0, 1.0 - distance / 1.0) # Ensure confidence is between 0 and 1
	# print(f'confidence={confidence}')
	# end_time = time.time()
	# inference_time = end_time - start_time

	# # Draw bounding boxes on the original images
	# image1_with_box = draw_bounding_box(image1, box1)
	# image2_with_box = draw_bounding_box(image2, box2)

	# result = f"Distance: {distance:.2f}\nMatch: {is_match}\nInference time: {inference_time:.2f} seconds"

	# return [image1_with_box, image2_with_box], result

	# # Create the Gradio interface
	# iface = gr.Interface(
	# fn=process_images,
	# inputs=[gr.Image(type="pil"), gr.Image(type="pil")],
	# outputs=[gr.Gallery(), gr.Textbox()],
	# title="Face Verification with FaceNet",
	# description="Upload two images and the model will verify if the faces in both images are of the same person."
	# )

	# # Launch the interface
	# iface.launch(share=True, debug=True)

	import torch
	import torch.nn as nn
	import numpy as np
	from PIL import Image, ImageDraw
	from torchvision import transforms
	from transformers import ViTImageProcessor, ViTModel
	from facenet_pytorch import MTCNN
	import gradio as gr
	import time

	# Define the Vision Transformer (ViT) architecture
	class ViT(nn.Module):
	def __init__(self, base_model):
	super(ViT, self).__init__()
	self.base_model = base_model
	self.dropout = nn.Dropout(p=0.2)
	self.fc = nn.Linear(base_model.config.hidden_size, 512)
	self.dropout2 = nn.Dropout(p=0.2)
	self.l2_norm = nn.functional.normalize

	def forward(self, x):
	x = self.base_model(x).pooler_output
	x = self.dropout(x)
	x = self.fc(x)
	x = self.dropout2(x)
	x = self.l2_norm(x, p=2, dim=1) # Apply L2 normalization
	return x

	# Load the pre-trained ViT model and processor
	model_name = "google/vit-base-patch16-224"
	processor = ViTImageProcessor.from_pretrained(model_name)
	base_model = ViTModel.from_pretrained(model_name)
	model = ViT(base_model)
	model_path = r'best_vit11.pth'
	model.load_state_dict(torch.load(model_path))
	model.eval().to('cuda' if torch.cuda.is_available() else 'cpu')

	# Initialize MTCNN for face detection
	mtcnn = MTCNN(keep_all=True, min_face_size=20, device='cuda' if torch.cuda.is_available() else 'cpu')

	def align_face(frame):
	# Convert the frame to a PIL image if it's a numpy array
	if isinstance(frame, np.ndarray):
	frame = Image.fromarray(frame)
	boxes, _ = mtcnn.detect(frame)
	if boxes is not None and len(boxes) > 0:
	faces = mtcnn(frame)
	if faces is not None and len(faces) > 0:
	face = faces[0]
	# Convert the face tensor to PIL Image
	face = transforms.ToPILImage()(face)
	return face, boxes[0]
	return None, None

	def draw_bounding_box(image, box):
	draw = ImageDraw.Draw(image)
	draw.rectangle(box.tolist(), outline="red", width=3)
	return image

	def euclidean_distance(embedding1, embedding2):
	return np.linalg.norm(embedding1 - embedding2)

	def cosine_similarity(embedding1, embedding2):
	return np.dot(embedding1, embedding2) / (np.linalg.norm(embedding1) * np.linalg.norm(embedding2))

	def process_images(image1, image2):
	start_time = time.time()

	frame1 = np.array(image1)
	frame2 = np.array(image2)

	face1, box1 = align_face(frame1)
	face2, box2 = align_face(frame2)

	if face1 is None or face2 is None:
	return None, "Face not detected in one or both images."

	# Use processor to preprocess the images
	face1 = processor(images=face1, return_tensors="pt").pixel_values.to('cuda' if torch.cuda.is_available() else 'cpu')
	face2 = processor(images=face2, return_tensors="pt").pixel_values.to('cuda' if torch.cuda.is_available() else 'cpu')

	with torch.no_grad():
	embedding1 = model(face1).cpu().numpy()
	embedding2 = model(face2).cpu().numpy()

	# Flatten the embeddings if necessary (ensuring they are 1D)
	embedding1 = embedding1.flatten()
	embedding2 = embedding2.flatten()

	euclidean_dist = euclidean_distance(embedding1, embedding2)
	cosine_sim = cosine_similarity(embedding1, embedding2)
	is_match = euclidean_dist < 0.2

	# Calculate confidence
	confidence = max(0.0, 1.0 - euclidean_dist / 1.0) # Ensure confidence is between 0 and 1
	print(f'confidence={confidence}')
	end_time = time.time()
	inference_time = end_time - start_time

	# Draw bounding boxes on the original images
	image1_with_box = draw_bounding_box(image1, box1)
	image2_with_box = draw_bounding_box(image2, box2)

	result = f"Euclidean Distance: {euclidean_dist:.2f}\n"
	# result += f"Cosine Similarity: {cosine_sim:.2f}\n"
	result += f"Match: {is_match}\n"
	result += f"Inference time: {inference_time:.2f} seconds"

	return [image1_with_box, image2_with_box], result

	# Create the Gradio interface
	iface = gr.Interface(
	fn=process_images,
	inputs=[gr.Image(type="pil"), gr.Image(type="pil")],
	outputs=[gr.Gallery(), gr.Textbox()],
	title="Face Verification with Vision Transformer",
	description="Upload two images and the model will verify if the faces in both images are of the same person."
	)

	# Launch the interface
	iface.launch(share=True, debug=True)