hasnanmr commited on
Commit
f7f224b
1 Parent(s): 39656ac

modify verification app

Browse files
Files changed (1) hide show
  1. app.py +33 -66
app.py CHANGED
@@ -7,7 +7,6 @@ import gradio as gr
7
  from PIL import Image
8
  from facenet_pytorch import MTCNN
9
  from transformers import ViTImageProcessor, ViTModel
10
- import pickle
11
  import time
12
 
13
  # Define the ViT class
@@ -43,91 +42,59 @@ transform = transforms.Compose([
43
  transforms.ToTensor()
44
  ])
45
 
46
- # Load the database of embeddings
47
- with open('face_database_me.pkl', 'rb') as f:
48
- database = pickle.load(f)
49
-
50
  def cosine_similarity(embedding1, embedding2):
51
  similarity = torch.nn.functional.cosine_similarity(embedding1.flatten().unsqueeze(0), embedding2.flatten().unsqueeze(0))
52
  return similarity.item()
53
 
54
- def compare_embeddings(embedding, database, threshold=0.9):
55
- best_match = None
56
- best_similarity = threshold
57
- for name, db_embeddings in database.items():
58
- for db_embedding in db_embeddings:
59
- db_embedding = torch.tensor(db_embedding).to(device)
60
- similarity = cosine_similarity(embedding, db_embedding)
61
- if similarity > best_similarity:
62
- best_match = name
63
- best_similarity = similarity
64
- if best_match is not None:
65
- return best_match, best_similarity
66
- return None, None
67
-
68
- def align_faces(frame):
69
  # Convert the frame to a PIL image if it's a numpy array
70
  if isinstance(frame, np.ndarray):
71
  frame = Image.fromarray(frame)
72
  boxes, _ = mtcnn.detect(frame)
73
- aligned_faces = []
74
- if boxes is not None:
75
  faces = mtcnn(frame)
76
- if faces is not None:
77
- for face in faces:
78
- # Convert the face tensor to PIL Image
79
- face = transforms.ToPILImage()(face)
80
- aligned_faces.append(face)
81
- return aligned_faces, boxes
82
 
83
- def draw_annotations(frame, detections, names=None):
84
- if detections is None:
85
- return frame
86
- if names is None:
87
- names = ["Unknown"] * len(detections)
88
- for i, detection in enumerate(detections):
89
- x1, y1, x2, y2 = map(int, detection)
90
- cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
91
- if names[i]:
92
- cv2.putText(frame, names[i], (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)
93
- return frame
94
-
95
- def process_image(image):
96
  start_time = time.time()
97
 
98
- frame = np.array(image)
99
- aligned_faces, boxes = align_faces(frame)
 
 
 
100
 
101
- names = []
102
- if aligned_faces is not None:
103
- for face in aligned_faces:
104
- face = transform(face).unsqueeze(0).to(device)
105
- with torch.no_grad():
106
- embedding = model(face)
107
- name, similarity = compare_embeddings(embedding, database)
108
- if name is not None:
109
- names.append(f"{name} ({similarity:.2f})")
110
- else:
111
- names.append("Unknown")
112
- annotated_image = draw_annotations(frame, boxes, names)
113
- result = "Face recognition complete."
114
- else:
115
- annotated_image = frame
116
- result = "No faces detected."
117
 
118
  end_time = time.time()
119
  inference_time = end_time - start_time
120
- result += f" Inference time: {inference_time:.2f} seconds"
121
 
122
- return annotated_image, result
 
 
123
 
124
  # Create the Gradio interface
125
  iface = gr.Interface(
126
- fn=process_image,
127
- inputs=gr.Image(type="pil"), # Ensure the input type matches what the function expects
128
- outputs=[gr.Image(type="numpy"), gr.Textbox()],
129
- title="Face Detection and Recognition with MTCNN and ViT",
130
- description="Upload an image and the model will detect and recognize faces in it."
131
  )
132
 
133
  # Launch the interface
 
7
  from PIL import Image
8
  from facenet_pytorch import MTCNN
9
  from transformers import ViTImageProcessor, ViTModel
 
10
  import time
11
 
12
  # Define the ViT class
 
42
  transforms.ToTensor()
43
  ])
44
 
 
 
 
 
45
  def cosine_similarity(embedding1, embedding2):
46
  similarity = torch.nn.functional.cosine_similarity(embedding1.flatten().unsqueeze(0), embedding2.flatten().unsqueeze(0))
47
  return similarity.item()
48
 
49
+ def align_face(frame):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  # Convert the frame to a PIL image if it's a numpy array
51
  if isinstance(frame, np.ndarray):
52
  frame = Image.fromarray(frame)
53
  boxes, _ = mtcnn.detect(frame)
54
+ if boxes is not None and len(boxes) > 0:
 
55
  faces = mtcnn(frame)
56
+ if faces is not None and len(faces) > 0:
57
+ face = faces[0]
58
+ # Convert the face tensor to PIL Image
59
+ face = transforms.ToPILImage()(face)
60
+ return face
61
+ return None
62
 
63
+ def process_images(image1, image2):
 
 
 
 
 
 
 
 
 
 
 
 
64
  start_time = time.time()
65
 
66
+ frame1 = np.array(image1)
67
+ frame2 = np.array(image2)
68
+
69
+ face1 = align_face(frame1)
70
+ face2 = align_face(frame2)
71
 
72
+ if face1 is None or face2 is None:
73
+ return None, "Face not detected in one or both images."
74
+
75
+ face1 = transform(face1).unsqueeze(0).to(device)
76
+ face2 = transform(face2).unsqueeze(0).to(device)
77
+
78
+ with torch.no_grad():
79
+ embedding1 = model(face1)
80
+ embedding2 = model(face2)
81
+
82
+ similarity = cosine_similarity(embedding1, embedding2)
 
 
 
 
 
83
 
84
  end_time = time.time()
85
  inference_time = end_time - start_time
 
86
 
87
+ result = f"Similarity: {similarity:.2f}\nInference time: {inference_time:.2f} seconds"
88
+
89
+ return (frame1, frame2), result
90
 
91
  # Create the Gradio interface
92
  iface = gr.Interface(
93
+ fn=process_images,
94
+ inputs=[gr.Image(type="pil"), gr.Image(type="pil")],
95
+ outputs=[gr.Gallery(), gr.Textbox()],
96
+ title="Face Verification with MTCNN and ViT",
97
+ description="Upload two images and the model will verify if the faces in both images are of the same person."
98
  )
99
 
100
  # Launch the interface