File size: 3,682 Bytes
bc26e93
2d3c662
 
607a1be
e0006de
007113c
2d3c662
d621e56
e0006de
 
607a1be
007113c
 
 
e0006de
007113c
 
bc26e93
82191d9
007113c
 
82191d9
007113c
d621e56
 
9d91d02
e0006de
fd50f72
d621e56
f7f224b
7894302
 
 
bc26e93
f7f224b
39f5823
f7f224b
 
 
 
e0006de
 
 
 
 
 
 
bc26e93
fd50f72
e0006de
fd50f72
 
f7f224b
2d3c662
 
f7f224b
 
 
e0006de
 
d621e56
f7f224b
 
 
007113c
 
f7f224b
 
007113c
 
f7f224b
007113c
 
fd50f72
61114d2
007113c
61114d2
 
 
2d3c662
 
 
e0006de
 
 
 
d5e4995
f7f224b
e0006de
d621e56
607a1be
2d3c662
f7f224b
 
 
007113c
f7f224b
2d3c662
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import torch
import torchvision.transforms as transforms
import numpy as np
import gradio as gr
from PIL import Image, ImageDraw
from facenet_pytorch import MTCNN, InceptionResnetV1
import time

# Initialize MTCNN for face detection with smaller face size detection
mtcnn = MTCNN(keep_all=True, device='cuda' if torch.cuda.is_available() else 'cpu', min_face_size=12)

# Load the pre-trained FaceNet model
facenet = InceptionResnetV1(pretrained='vggface2').eval().to('cuda' if torch.cuda.is_available() else 'cpu')
model_path = r'faceNet_update_transformation.pth'
model_state_dict = torch.load(model_path)
facenet.load_state_dict(model_state_dict)
facenet.eval()  # Set the model to evaluation mode

# Define the transformation with normalization
val_test_transform = transforms.Compose([
    transforms.Resize((160, 160)),  # FaceNet expects 160x160 input
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

def compare_faces(embedding1, embedding2, threshold=0.2):  # Adjusted threshold
    dist = np.linalg.norm(embedding1 - embedding2)
    return dist, dist < threshold

def align_face(frame):
    # Convert the frame to a PIL image if it's a numpy array
    if isinstance(frame, np.ndarray):
        frame = Image.fromarray(frame)
    boxes, _ = mtcnn.detect(frame)
    if boxes is not None and len(boxes) > 0:
        faces = mtcnn(frame)
        if faces is not None and len(faces) > 0:
            face = faces[0]
            # Convert the face tensor to PIL Image
            face = transforms.ToPILImage()(face)
            return face, boxes[0]
    return None, None

def draw_bounding_box(image, box):
    draw = ImageDraw.Draw(image)
    draw.rectangle(box.tolist(), outline="red", width=3)
    return image

def l2_normalize(tensor):
    norm = np.linalg.norm(tensor, ord=2, axis=1, keepdims=True)
    return tensor / norm

def process_images(image1, image2):
    start_time = time.time()
    
    frame1 = np.array(image1)
    frame2 = np.array(image2)
    
    face1, box1 = align_face(frame1)
    face2, box2 = align_face(frame2)
    
    if face1 is None or face2 is None:
        return None, "Face not detected in one or both images."
    
    face1 = val_test_transform(face1).unsqueeze(0).to('cuda' if torch.cuda.is_available() else 'cpu')
    face2 = val_test_transform(face2).unsqueeze(0).to('cuda' if torch.cuda.is_available() else 'cpu')
    
    with torch.no_grad():
        embedding1 = facenet(face1).cpu().numpy()
        embedding2 = facenet(face2).cpu().numpy()
    
    embedding1 = l2_normalize(embedding1)
    embedding2 = l2_normalize(embedding2)
    
    distance, is_match = compare_faces(embedding1, embedding2, threshold=0.1)
    
    # Calculate confidence
    confidence = max(0.0, 1.0 - distance / 1.0)  # Ensure confidence is between 0 and 1
    print(f'confidence={confidence}')
    end_time = time.time()
    inference_time = end_time - start_time
    
    # Draw bounding boxes on the original images
    image1_with_box = draw_bounding_box(image1, box1)
    image2_with_box = draw_bounding_box(image2, box2)
    
    result = f"Distance: {distance:.2f}\nMatch: {is_match}\nInference time: {inference_time:.2f} seconds"
    
    return [image1_with_box, image2_with_box], result

# Create the Gradio interface
iface = gr.Interface(
    fn=process_images,
    inputs=[gr.Image(type="pil"), gr.Image(type="pil")],
    outputs=[gr.Gallery(), gr.Textbox()],
    title="Face Verification with FaceNet",
    description="Upload two images and the model will verify if the faces in both images are of the same person."
)

# Launch the interface
iface.launch(share=True, debug=True)