hasnanmr commited on
Commit
ea05742
·
1 Parent(s): 6376131

test new model

Browse files
Files changed (2) hide show
  1. app.py +155 -33
  2. best_vit11.pth +3 -0
app.py CHANGED
@@ -1,31 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
- import torchvision.transforms as transforms
3
  import numpy as np
4
- import gradio as gr
5
  from PIL import Image, ImageDraw
6
- from facenet_pytorch import MTCNN, InceptionResnetV1
 
 
 
7
  import time
8
 
9
- # Initialize MTCNN for face detection with smaller face size detection
10
- mtcnn = MTCNN(keep_all=True, device='cuda' if torch.cuda.is_available() else 'cpu', min_face_size=12)
 
 
 
 
 
 
 
11
 
12
- # Load the pre-trained FaceNet model
13
- facenet = InceptionResnetV1(pretrained='vggface2').eval().to('cuda' if torch.cuda.is_available() else 'cpu')
14
- model_path = r'faceNet_update_transformation.pth'
15
- model_state_dict = torch.load(model_path)
16
- facenet.load_state_dict(model_state_dict)
17
- facenet.eval() # Set the model to evaluation mode
 
18
 
19
- # Define the transformation with normalization
20
- val_test_transform = transforms.Compose([
21
- transforms.Resize((160, 160)), # FaceNet expects 160x160 input
22
- transforms.ToTensor(),
23
- transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
24
- ])
 
 
25
 
26
- def compare_faces(embedding1, embedding2, threshold=0.2): # Adjusted threshold
27
- dist = np.linalg.norm(embedding1 - embedding2)
28
- return dist, dist < threshold
29
 
30
  def align_face(frame):
31
  # Convert the frame to a PIL image if it's a numpy array
@@ -46,9 +159,11 @@ def draw_bounding_box(image, box):
46
  draw.rectangle(box.tolist(), outline="red", width=3)
47
  return image
48
 
49
- def l2_normalize(tensor):
50
- norm = np.linalg.norm(tensor, ord=2, axis=1, keepdims=True)
51
- return tensor / norm
 
 
52
 
53
  def process_images(image1, image2):
54
  start_time = time.time()
@@ -62,20 +177,24 @@ def process_images(image1, image2):
62
  if face1 is None or face2 is None:
63
  return None, "Face not detected in one or both images."
64
 
65
- face1 = val_test_transform(face1).unsqueeze(0).to('cuda' if torch.cuda.is_available() else 'cpu')
66
- face2 = val_test_transform(face2).unsqueeze(0).to('cuda' if torch.cuda.is_available() else 'cpu')
 
67
 
68
  with torch.no_grad():
69
- embedding1 = facenet(face1).cpu().numpy()
70
- embedding2 = facenet(face2).cpu().numpy()
71
 
72
- embedding1 = l2_normalize(embedding1)
73
- embedding2 = l2_normalize(embedding2)
 
74
 
75
- distance, is_match = compare_faces(embedding1, embedding2, threshold=0.2)
 
 
76
 
77
  # Calculate confidence
78
- confidence = max(0.0, 1.0 - distance / 1.0) # Ensure confidence is between 0 and 1
79
  print(f'confidence={confidence}')
80
  end_time = time.time()
81
  inference_time = end_time - start_time
@@ -84,7 +203,10 @@ def process_images(image1, image2):
84
  image1_with_box = draw_bounding_box(image1, box1)
85
  image2_with_box = draw_bounding_box(image2, box2)
86
 
87
- result = f"Distance: {distance:.2f}\nMatch: {is_match}\nInference time: {inference_time:.2f} seconds"
 
 
 
88
 
89
  return [image1_with_box, image2_with_box], result
90
 
@@ -93,7 +215,7 @@ iface = gr.Interface(
93
  fn=process_images,
94
  inputs=[gr.Image(type="pil"), gr.Image(type="pil")],
95
  outputs=[gr.Gallery(), gr.Textbox()],
96
- title="Face Verification with FaceNet",
97
  description="Upload two images and the model will verify if the faces in both images are of the same person."
98
  )
99
 
 
1
+ # import torch
2
+ # import torchvision.transforms as transforms
3
+ # import numpy as np
4
+ # import gradio as gr
5
+ # from PIL import Image, ImageDraw
6
+ # from facenet_pytorch import MTCNN, InceptionResnetV1
7
+ # import time
8
+
9
+ # # Initialize MTCNN for face detection with smaller face size detection
10
+ # mtcnn = MTCNN(keep_all=True, device='cuda' if torch.cuda.is_available() else 'cpu', min_face_size=20)
11
+
12
+ # # Load the pre-trained FaceNet model
13
+ # facenet = InceptionResnetV1(pretrained='vggface2').eval().to('cuda' if torch.cuda.is_available() else 'cpu')
14
+ # model_path = r'faceNet_update_transformation.pth'
15
+ # model_state_dict = torch.load(model_path)
16
+ # facenet.load_state_dict(model_state_dict)
17
+ # facenet.eval() # Set the model to evaluation mode
18
+
19
+ # # Define the transformation with normalization
20
+ # val_test_transform = transforms.Compose([
21
+ # transforms.Resize((160, 160)), # FaceNet expects 160x160 input
22
+ # transforms.ToTensor(),
23
+ # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
24
+ # ])
25
+
26
+ # def compare_faces(embedding1, embedding2, threshold=0.2): # Adjusted threshold
27
+ # dist = np.linalg.norm(embedding1 - embedding2)
28
+ # return dist, dist < threshold
29
+
30
+ # def align_face(frame):
31
+ # # Convert the frame to a PIL image if it's a numpy array
32
+ # if isinstance(frame, np.ndarray):
33
+ # frame = Image.fromarray(frame)
34
+ # boxes, _ = mtcnn.detect(frame)
35
+ # if boxes is not None and len(boxes) > 0:
36
+ # faces = mtcnn(frame)
37
+ # if faces is not None and len(faces) > 0:
38
+ # face = faces[0]
39
+ # # Convert the face tensor to PIL Image
40
+ # face = transforms.ToPILImage()(face)
41
+ # return face, boxes[0]
42
+ # return None, None
43
+
44
+ # def draw_bounding_box(image, box):
45
+ # draw = ImageDraw.Draw(image)
46
+ # draw.rectangle(box.tolist(), outline="red", width=3)
47
+ # return image
48
+
49
+ # def l2_normalize(tensor):
50
+ # norm = np.linalg.norm(tensor, ord=2, axis=1, keepdims=True)
51
+ # return tensor / norm
52
+
53
+ # def process_images(image1, image2):
54
+ # start_time = time.time()
55
+
56
+ # frame1 = np.array(image1)
57
+ # frame2 = np.array(image2)
58
+
59
+ # face1, box1 = align_face(frame1)
60
+ # face2, box2 = align_face(frame2)
61
+
62
+ # if face1 is None or face2 is None:
63
+ # return None, "Face not detected in one or both images."
64
+
65
+ # face1 = val_test_transform(face1).unsqueeze(0).to('cuda' if torch.cuda.is_available() else 'cpu')
66
+ # face2 = val_test_transform(face2).unsqueeze(0).to('cuda' if torch.cuda.is_available() else 'cpu')
67
+
68
+ # with torch.no_grad():
69
+ # embedding1 = facenet(face1).cpu().numpy()
70
+ # embedding2 = facenet(face2).cpu().numpy()
71
+
72
+ # embedding1 = l2_normalize(embedding1)
73
+ # embedding2 = l2_normalize(embedding2)
74
+
75
+ # distance, is_match = compare_faces(embedding1, embedding2, threshold=0.2)
76
+
77
+ # # Calculate confidence
78
+ # confidence = max(0.0, 1.0 - distance / 1.0) # Ensure confidence is between 0 and 1
79
+ # print(f'confidence={confidence}')
80
+ # end_time = time.time()
81
+ # inference_time = end_time - start_time
82
+
83
+ # # Draw bounding boxes on the original images
84
+ # image1_with_box = draw_bounding_box(image1, box1)
85
+ # image2_with_box = draw_bounding_box(image2, box2)
86
+
87
+ # result = f"Distance: {distance:.2f}\nMatch: {is_match}\nInference time: {inference_time:.2f} seconds"
88
+
89
+ # return [image1_with_box, image2_with_box], result
90
+
91
+ # # Create the Gradio interface
92
+ # iface = gr.Interface(
93
+ # fn=process_images,
94
+ # inputs=[gr.Image(type="pil"), gr.Image(type="pil")],
95
+ # outputs=[gr.Gallery(), gr.Textbox()],
96
+ # title="Face Verification with FaceNet",
97
+ # description="Upload two images and the model will verify if the faces in both images are of the same person."
98
+ # )
99
+
100
+ # # Launch the interface
101
+ # iface.launch(share=True, debug=True)
102
+
103
  import torch
104
+ import torch.nn as nn
105
  import numpy as np
 
106
  from PIL import Image, ImageDraw
107
+ from torchvision import transforms
108
+ from transformers import ViTImageProcessor, ViTModel
109
+ from facenet_pytorch import MTCNN
110
+ import gradio as gr
111
  import time
112
 
113
+ # Define the Vision Transformer (ViT) architecture
114
+ class ViT(nn.Module):
115
+ def __init__(self, base_model):
116
+ super(ViT, self).__init__()
117
+ self.base_model = base_model
118
+ self.dropout = nn.Dropout(p=0.2)
119
+ self.fc = nn.Linear(base_model.config.hidden_size, 512)
120
+ self.dropout2 = nn.Dropout(p=0.2)
121
+ self.l2_norm = nn.functional.normalize
122
 
123
+ def forward(self, x):
124
+ x = self.base_model(x).pooler_output
125
+ x = self.dropout(x)
126
+ x = self.fc(x)
127
+ x = self.dropout2(x)
128
+ x = self.l2_norm(x, p=2, dim=1) # Apply L2 normalization
129
+ return x
130
 
131
+ # Load the pre-trained ViT model and processor
132
+ model_name = "google/vit-base-patch16-224"
133
+ processor = ViTImageProcessor.from_pretrained(model_name)
134
+ base_model = ViTModel.from_pretrained(model_name)
135
+ model = ViT(base_model)
136
+ model_path = r'best_vit11.pth'
137
+ model.load_state_dict(torch.load(model_path))
138
+ model.eval().to('cuda' if torch.cuda.is_available() else 'cpu')
139
 
140
+ # Initialize MTCNN for face detection
141
+ mtcnn = MTCNN(keep_all=True, min_face_size=20, device='cuda' if torch.cuda.is_available() else 'cpu')
 
142
 
143
  def align_face(frame):
144
  # Convert the frame to a PIL image if it's a numpy array
 
159
  draw.rectangle(box.tolist(), outline="red", width=3)
160
  return image
161
 
162
+ def euclidean_distance(embedding1, embedding2):
163
+ return np.linalg.norm(embedding1 - embedding2)
164
+
165
+ def cosine_similarity(embedding1, embedding2):
166
+ return np.dot(embedding1, embedding2) / (np.linalg.norm(embedding1) * np.linalg.norm(embedding2))
167
 
168
  def process_images(image1, image2):
169
  start_time = time.time()
 
177
  if face1 is None or face2 is None:
178
  return None, "Face not detected in one or both images."
179
 
180
+ # Use processor to preprocess the images
181
+ face1 = processor(images=face1, return_tensors="pt").pixel_values.to('cuda' if torch.cuda.is_available() else 'cpu')
182
+ face2 = processor(images=face2, return_tensors="pt").pixel_values.to('cuda' if torch.cuda.is_available() else 'cpu')
183
 
184
  with torch.no_grad():
185
+ embedding1 = model(face1).cpu().numpy()
186
+ embedding2 = model(face2).cpu().numpy()
187
 
188
+ # Flatten the embeddings if necessary (ensuring they are 1D)
189
+ embedding1 = embedding1.flatten()
190
+ embedding2 = embedding2.flatten()
191
 
192
+ euclidean_dist = euclidean_distance(embedding1, embedding2)
193
+ cosine_sim = cosine_similarity(embedding1, embedding2)
194
+ is_match = euclidean_dist < 0.2
195
 
196
  # Calculate confidence
197
+ confidence = max(0.0, 1.0 - euclidean_dist / 1.0) # Ensure confidence is between 0 and 1
198
  print(f'confidence={confidence}')
199
  end_time = time.time()
200
  inference_time = end_time - start_time
 
203
  image1_with_box = draw_bounding_box(image1, box1)
204
  image2_with_box = draw_bounding_box(image2, box2)
205
 
206
+ result = f"Euclidean Distance: {euclidean_dist:.2f}\n"
207
+ # result += f"Cosine Similarity: {cosine_sim:.2f}\n"
208
+ result += f"Match: {is_match}\n"
209
+ result += f"Inference time: {inference_time:.2f} seconds"
210
 
211
  return [image1_with_box, image2_with_box], result
212
 
 
215
  fn=process_images,
216
  inputs=[gr.Image(type="pil"), gr.Image(type="pil")],
217
  outputs=[gr.Gallery(), gr.Textbox()],
218
+ title="Face Verification with Vision Transformer",
219
  description="Upload two images and the model will verify if the faces in both images are of the same person."
220
  )
221
 
best_vit11.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7d41bca403110bf291cf0b40749f486072de2bb701c7749eafa4fac9eb04860
3
+ size 347217224