import torch import torchvision.transforms as transforms import numpy as np import gradio as gr from PIL import Image, ImageDraw from facenet_pytorch import MTCNN, InceptionResnetV1 import time # Initialize MTCNN for face detection with smaller face size detection mtcnn = MTCNN(keep_all=True, device='cuda' if torch.cuda.is_available() else 'cpu', min_face_size=20) # Load the pre-trained FaceNet model facenet = InceptionResnetV1(pretrained='vggface2').eval().to('cuda' if torch.cuda.is_available() else 'cpu') model_path = r'faceNet_update_transformation.pth' model_state_dict = torch.load(model_path) facenet.load_state_dict(model_state_dict) facenet.eval() # Set the model to evaluation mode # Define the transformation with normalization val_test_transform = transforms.Compose([ transforms.Resize((160, 160)), # FaceNet expects 160x160 input transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) def compare_faces(embedding1, embedding2, threshold=0.2): # Adjusted threshold dist = np.linalg.norm(embedding1 - embedding2) return dist, dist < threshold def align_face(frame): # Convert the frame to a PIL image if it's a numpy array if isinstance(frame, np.ndarray): frame = Image.fromarray(frame) boxes, _ = mtcnn.detect(frame) if boxes is not None and len(boxes) > 0: faces = mtcnn(frame) if faces is not None and len(faces) > 0: face = faces[0] # Convert the face tensor to PIL Image face = transforms.ToPILImage()(face) return face, boxes[0] return None, None def draw_bounding_box(image, box): draw = ImageDraw.Draw(image) draw.rectangle(box.tolist(), outline="red", width=3) return image def l2_normalize(tensor): norm = np.linalg.norm(tensor, ord=2, axis=1, keepdims=True) return tensor / norm def process_images(image1, image2): start_time = time.time() frame1 = np.array(image1) frame2 = np.array(image2) face1, box1 = align_face(frame1) face2, box2 = align_face(frame2) if face1 is None or face2 is None: return None, "Face not detected in one or both images." face1 = val_test_transform(face1).unsqueeze(0).to('cuda' if torch.cuda.is_available() else 'cpu') face2 = val_test_transform(face2).unsqueeze(0).to('cuda' if torch.cuda.is_available() else 'cpu') with torch.no_grad(): embedding1 = facenet(face1).cpu().numpy() embedding2 = facenet(face2).cpu().numpy() embedding1 = l2_normalize(embedding1) embedding2 = l2_normalize(embedding2) distance, is_match = compare_faces(embedding1, embedding2, threshold=0.2) # Calculate confidence confidence = max(0.0, 1.0 - distance / 1.0) # Ensure confidence is between 0 and 1 print(f'confidence={confidence}') end_time = time.time() inference_time = end_time - start_time # Draw bounding boxes on the original images image1_with_box = draw_bounding_box(image1, box1) image2_with_box = draw_bounding_box(image2, box2) result = f"Distance: {distance:.2f}\nMatch: {is_match}\nInference time: {inference_time:.2f} seconds" return [image1_with_box, image2_with_box], result # Create the Gradio interface iface = gr.Interface( fn=process_images, inputs=[gr.Image(type="pil"), gr.Image(type="pil")], outputs=[gr.Gallery(), gr.Textbox()], title="Face Verification with FaceNet", description="Upload two images and the model will verify if the faces in both images are of the same person." ) # Launch the interface iface.launch(share=True, debug=True)