hasnanmr commited on
Commit
d5f0cdf
·
1 Parent(s): 5587663

add another model state of vit

Browse files
Files changed (1) hide show
  1. app_facevit.py +121 -0
app_facevit.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import numpy as np
4
+ from PIL import Image, ImageDraw
5
+ from torchvision import transforms
6
+ from transformers import ViTImageProcessor, ViTModel
7
+ from facenet_pytorch import MTCNN
8
+ import gradio as gr
9
+ import time
10
+
11
+ # Define the Vision Transformer (ViT) architecture
12
+ class ViT(nn.Module):
13
+ def __init__(self, base_model):
14
+ super(ViT, self).__init__()
15
+ self.base_model = base_model
16
+ self.dropout = nn.Dropout(p=0.2)
17
+ self.fc = nn.Linear(base_model.config.hidden_size, 512)
18
+ self.dropout2 = nn.Dropout(p=0.2)
19
+ self.l2_norm = nn.functional.normalize
20
+
21
+ def forward(self, x):
22
+ x = self.base_model(x).pooler_output
23
+ x = self.dropout(x)
24
+ x = self.fc(x)
25
+ x = self.dropout2(x)
26
+ x = self.l2_norm(x, p=2, dim=1) # Apply L2 normalization
27
+ return x
28
+
29
+ # Load the pre-trained ViT model and processor
30
+ model_name = "google/vit-base-patch16-224"
31
+ processor = ViTImageProcessor.from_pretrained(model_name)
32
+ base_model = ViTModel.from_pretrained(model_name)
33
+ model = ViT(base_model)
34
+ model_path = r'best_vit11.pth'
35
+ model.load_state_dict(torch.load(model_path))
36
+ model.eval().to('cuda' if torch.cuda.is_available() else 'cpu')
37
+
38
+ # Initialize MTCNN for face detection
39
+ mtcnn = MTCNN(keep_all=True, min_face_size=20, device='cuda' if torch.cuda.is_available() else 'cpu')
40
+
41
+ def align_face(frame):
42
+ # Convert the frame to a PIL image if it's a numpy array
43
+ if isinstance(frame, np.ndarray):
44
+ frame = Image.fromarray(frame)
45
+ boxes, _ = mtcnn.detect(frame)
46
+ if boxes is not None and len(boxes) > 0:
47
+ faces = mtcnn(frame)
48
+ if faces is not None and len(faces) > 0:
49
+ face = faces[0]
50
+ # Convert the face tensor to PIL Image
51
+ face = transforms.ToPILImage()(face)
52
+ return face, boxes[0]
53
+ return None, None
54
+
55
+ def draw_bounding_box(image, box):
56
+ draw = ImageDraw.Draw(image)
57
+ draw.rectangle(box.tolist(), outline="red", width=3)
58
+ return image
59
+
60
+ def euclidean_distance(embedding1, embedding2):
61
+ return np.linalg.norm(embedding1 - embedding2)
62
+
63
+ def cosine_similarity(embedding1, embedding2):
64
+ return np.dot(embedding1, embedding2) / (np.linalg.norm(embedding1) * np.linalg.norm(embedding2))
65
+
66
+ def process_images(image1, image2):
67
+ start_time = time.time()
68
+
69
+ frame1 = np.array(image1)
70
+ frame2 = np.array(image2)
71
+
72
+ face1, box1 = align_face(frame1)
73
+ face2, box2 = align_face(frame2)
74
+
75
+ if face1 is None or face2 is None:
76
+ return None, "Face not detected in one or both images."
77
+
78
+ # Use processor to preprocess the images
79
+ face1 = processor(images=face1, return_tensors="pt").pixel_values.to('cuda' if torch.cuda.is_available() else 'cpu')
80
+ face2 = processor(images=face2, return_tensors="pt").pixel_values.to('cuda' if torch.cuda.is_available() else 'cpu')
81
+
82
+ with torch.no_grad():
83
+ embedding1 = model(face1).cpu().numpy()
84
+ embedding2 = model(face2).cpu().numpy()
85
+
86
+ # Flatten the embeddings if necessary (ensuring they are 1D)
87
+ embedding1 = embedding1.flatten()
88
+ embedding2 = embedding2.flatten()
89
+
90
+ euclidean_dist = euclidean_distance(embedding1, embedding2)
91
+ cosine_sim = cosine_similarity(embedding1, embedding2)
92
+ is_match = euclidean_dist < 0.2
93
+
94
+ # Calculate confidence
95
+ confidence = max(0.0, 1.0 - euclidean_dist / 1.0) # Ensure confidence is between 0 and 1
96
+ print(f'confidence={confidence}')
97
+ end_time = time.time()
98
+ inference_time = end_time - start_time
99
+
100
+ # Draw bounding boxes on the original images
101
+ image1_with_box = draw_bounding_box(image1, box1)
102
+ image2_with_box = draw_bounding_box(image2, box2)
103
+
104
+ result = f"Euclidean Distance: {euclidean_dist:.2f}\n"
105
+ # result += f"Cosine Similarity: {cosine_sim:.2f}\n"
106
+ result += f"Match: {is_match}\n"
107
+ result += f"Inference time: {inference_time:.2f} seconds"
108
+
109
+ return [image1_with_box, image2_with_box], result
110
+
111
+ # Create the Gradio interface
112
+ iface = gr.Interface(
113
+ fn=process_images,
114
+ inputs=[gr.Image(type="pil"), gr.Image(type="pil")],
115
+ outputs=[gr.Gallery(), gr.Textbox()],
116
+ title="Face Verification with Vision Transformer",
117
+ description="Upload two images and the model will verify if the faces in both images are of the same person."
118
+ )
119
+
120
+ # Launch the interface
121
+ iface.launch(share=True, debug=True)