hasnanmr commited on
Commit
98ffefb
·
1 Parent(s): d5f0cdf

add more adjustment

Browse files
Files changed (4) hide show
  1. .gitignore +3 -2
  2. app.py +1 -1
  3. app_facevit.py +34 -64
  4. faceNet_update_transformation.pth +3 -0
.gitignore CHANGED
@@ -1,3 +1,4 @@
1
- app_facenet.py
2
  flagged
3
- best_vit10.pth
 
 
1
+ app_facevit.py
2
  flagged
3
+ best_vit10.pth
4
+ .gitattributes
app.py CHANGED
@@ -38,7 +38,7 @@ model.load_state_dict(torch.load(model_path))
38
 
39
 
40
  # Initialize MTCNN for face detection
41
- mtcnn = MTCNN(keep_all=True, min_face_size=12, post_process=False, device=device)
42
 
43
  def align_face(frame):
44
  # Convert the frame to a PIL image if it's a numpy array
 
38
 
39
 
40
  # Initialize MTCNN for face detection
41
+ mtcnn = MTCNN(keep_all=True, min_face_size=12,device=device)
42
 
43
  def align_face(frame):
44
  # Convert the frame to a PIL image if it's a numpy array
app_facevit.py CHANGED
@@ -1,48 +1,32 @@
1
  import torch
2
- import torch.nn as nn
3
  import numpy as np
4
- from PIL import Image, ImageDraw
5
- from torchvision import transforms
6
- from transformers import ViTImageProcessor, ViTModel
7
- from facenet_pytorch import MTCNN
8
  import gradio as gr
 
 
9
  import time
10
-
11
- # Define the Vision Transformer (ViT) architecture
12
- class ViT(nn.Module):
13
- def __init__(self, base_model):
14
- super(ViT, self).__init__()
15
- self.base_model = base_model
16
- self.dropout = nn.Dropout(p=0.2)
17
- self.fc = nn.Linear(base_model.config.hidden_size, 512)
18
- self.dropout2 = nn.Dropout(p=0.2)
19
- self.l2_norm = nn.functional.normalize
20
-
21
- def forward(self, x):
22
- x = self.base_model(x).pooler_output
23
- x = self.dropout(x)
24
- x = self.fc(x)
25
- x = self.dropout2(x)
26
- x = self.l2_norm(x, p=2, dim=1) # Apply L2 normalization
27
- return x
28
-
29
- # Load the pre-trained ViT model and processor
30
- model_name = "google/vit-base-patch16-224"
31
- processor = ViTImageProcessor.from_pretrained(model_name)
32
- base_model = ViTModel.from_pretrained(model_name)
33
- model = ViT(base_model)
34
- model_path = r'best_vit11.pth'
35
- model.load_state_dict(torch.load(model_path))
36
- model.eval().to('cuda' if torch.cuda.is_available() else 'cpu')
37
-
38
- # Initialize MTCNN for face detection
39
- mtcnn = MTCNN(keep_all=True, min_face_size=20, device='cuda' if torch.cuda.is_available() else 'cpu')
40
-
41
  def align_face(frame):
42
  # Convert the frame to a PIL image if it's a numpy array
43
  if isinstance(frame, np.ndarray):
44
  frame = Image.fromarray(frame)
45
- boxes, _ = mtcnn.detect(frame)
46
  if boxes is not None and len(boxes) > 0:
47
  faces = mtcnn(frame)
48
  if faces is not None and len(faces) > 0:
@@ -51,18 +35,13 @@ def align_face(frame):
51
  face = transforms.ToPILImage()(face)
52
  return face, boxes[0]
53
  return None, None
54
-
55
  def draw_bounding_box(image, box):
56
  draw = ImageDraw.Draw(image)
57
  draw.rectangle(box.tolist(), outline="red", width=3)
58
  return image
59
-
60
- def euclidean_distance(embedding1, embedding2):
61
- return np.linalg.norm(embedding1 - embedding2)
62
-
63
- def cosine_similarity(embedding1, embedding2):
64
- return np.dot(embedding1, embedding2) / (np.linalg.norm(embedding1) * np.linalg.norm(embedding2))
65
-
66
  def process_images(image1, image2):
67
  start_time = time.time()
68
 
@@ -75,24 +54,20 @@ def process_images(image1, image2):
75
  if face1 is None or face2 is None:
76
  return None, "Face not detected in one or both images."
77
 
78
- # Use processor to preprocess the images
79
- face1 = processor(images=face1, return_tensors="pt").pixel_values.to('cuda' if torch.cuda.is_available() else 'cpu')
80
- face2 = processor(images=face2, return_tensors="pt").pixel_values.to('cuda' if torch.cuda.is_available() else 'cpu')
81
 
82
  with torch.no_grad():
83
- embedding1 = model(face1).cpu().numpy()
84
- embedding2 = model(face2).cpu().numpy()
85
 
86
- # Flatten the embeddings if necessary (ensuring they are 1D)
87
- embedding1 = embedding1.flatten()
88
- embedding2 = embedding2.flatten()
89
 
90
- euclidean_dist = euclidean_distance(embedding1, embedding2)
91
- cosine_sim = cosine_similarity(embedding1, embedding2)
92
- is_match = euclidean_dist < 0.2
93
 
94
  # Calculate confidence
95
- confidence = max(0.0, 1.0 - euclidean_dist / 1.0) # Ensure confidence is between 0 and 1
96
  print(f'confidence={confidence}')
97
  end_time = time.time()
98
  inference_time = end_time - start_time
@@ -101,21 +76,16 @@ def process_images(image1, image2):
101
  image1_with_box = draw_bounding_box(image1, box1)
102
  image2_with_box = draw_bounding_box(image2, box2)
103
 
104
- result = f"Euclidean Distance: {euclidean_dist:.2f}\n"
105
- # result += f"Cosine Similarity: {cosine_sim:.2f}\n"
106
- result += f"Match: {is_match}\n"
107
- result += f"Inference time: {inference_time:.2f} seconds"
108
 
109
  return [image1_with_box, image2_with_box], result
110
-
111
  # Create the Gradio interface
112
  iface = gr.Interface(
113
  fn=process_images,
114
  inputs=[gr.Image(type="pil"), gr.Image(type="pil")],
115
  outputs=[gr.Gallery(), gr.Textbox()],
116
- title="Face Verification with Vision Transformer",
117
  description="Upload two images and the model will verify if the faces in both images are of the same person."
118
  )
119
-
120
  # Launch the interface
121
  iface.launch(share=True, debug=True)
 
1
  import torch
2
+ import torchvision.transforms as transforms
3
  import numpy as np
 
 
 
 
4
  import gradio as gr
5
+ from PIL import Image, ImageDraw
6
+ from facenet_pytorch import MTCNN, InceptionResnetV1
7
  import time
8
+ # Initialize MTCNN for face detection with smaller face size detection
9
+ mtcnn = MTCNN(keep_all=True, device='cuda' if torch.cuda.is_available() else 'cpu', min_face_size=20)
10
+ # Load the pre-trained FaceNet model
11
+ facenet = InceptionResnetV1(pretrained='vggface2').eval().to('cuda' if torch.cuda.is_available() else 'cpu')
12
+ model_path = r'faceNet_update_transformation.pth'
13
+ model_state_dict = torch.load(model_path)
14
+ facenet.load_state_dict(model_state_dict)
15
+ facenet.eval() # Set the model to evaluation mode
16
+ # Define the transformation with normalization
17
+ val_test_transform = transforms.Compose([
18
+ transforms.Resize((160, 160)), # FaceNet expects 160x160 input
19
+ transforms.ToTensor(),
20
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
21
+ ])
22
+ def compare_faces(embedding1, embedding2, threshold=0.2): # Adjusted threshold
23
+ dist = np.linalg.norm(embedding1 - embedding2)
24
+ return dist, dist < threshold
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  def align_face(frame):
26
  # Convert the frame to a PIL image if it's a numpy array
27
  if isinstance(frame, np.ndarray):
28
  frame = Image.fromarray(frame)
29
+ boxes, _ = mtcnn.detect(frame)
30
  if boxes is not None and len(boxes) > 0:
31
  faces = mtcnn(frame)
32
  if faces is not None and len(faces) > 0:
 
35
  face = transforms.ToPILImage()(face)
36
  return face, boxes[0]
37
  return None, None
 
38
  def draw_bounding_box(image, box):
39
  draw = ImageDraw.Draw(image)
40
  draw.rectangle(box.tolist(), outline="red", width=3)
41
  return image
42
+ def l2_normalize(tensor):
43
+ norm = np.linalg.norm(tensor, ord=2, axis=1, keepdims=True)
44
+ return tensor / norm
 
 
 
 
45
  def process_images(image1, image2):
46
  start_time = time.time()
47
 
 
54
  if face1 is None or face2 is None:
55
  return None, "Face not detected in one or both images."
56
 
57
+ face1 = val_test_transform(face1).unsqueeze(0).to('cuda' if torch.cuda.is_available() else 'cpu')
58
+ face2 = val_test_transform(face2).unsqueeze(0).to('cuda' if torch.cuda.is_available() else 'cpu')
 
59
 
60
  with torch.no_grad():
61
+ embedding1 = facenet(face1).cpu().numpy()
62
+ embedding2 = facenet(face2).cpu().numpy()
63
 
64
+ embedding1 = l2_normalize(embedding1)
65
+ embedding2 = l2_normalize(embedding2)
 
66
 
67
+ distance, is_match = compare_faces(embedding1, embedding2, threshold=0.2)
 
 
68
 
69
  # Calculate confidence
70
+ confidence = max(0.0, 1.0 - distance / 1.0) # Ensure confidence is between 0 and 1
71
  print(f'confidence={confidence}')
72
  end_time = time.time()
73
  inference_time = end_time - start_time
 
76
  image1_with_box = draw_bounding_box(image1, box1)
77
  image2_with_box = draw_bounding_box(image2, box2)
78
 
79
+ result = f"Distance: {distance:.2f}\nMatch: {is_match}\nInference time: {inference_time:.2f} seconds"
 
 
 
80
 
81
  return [image1_with_box, image2_with_box], result
 
82
  # Create the Gradio interface
83
  iface = gr.Interface(
84
  fn=process_images,
85
  inputs=[gr.Image(type="pil"), gr.Image(type="pil")],
86
  outputs=[gr.Gallery(), gr.Textbox()],
87
+ title="Face Verification with FaceNet",
88
  description="Upload two images and the model will verify if the faces in both images are of the same person."
89
  )
 
90
  # Launch the interface
91
  iface.launch(share=True, debug=True)
faceNet_update_transformation.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86b4b567798373e423655892a9a377038d2cfae87bbb073d3d9ae83b93a94081
3
+ size 112028666