Spaces:
Sleeping
Sleeping
modify verification app
Browse files
app.py
CHANGED
@@ -7,7 +7,6 @@ import gradio as gr
|
|
7 |
from PIL import Image
|
8 |
from facenet_pytorch import MTCNN
|
9 |
from transformers import ViTImageProcessor, ViTModel
|
10 |
-
import pickle
|
11 |
import time
|
12 |
|
13 |
# Define the ViT class
|
@@ -43,91 +42,59 @@ transform = transforms.Compose([
|
|
43 |
transforms.ToTensor()
|
44 |
])
|
45 |
|
46 |
-
# Load the database of embeddings
|
47 |
-
with open('face_database_me.pkl', 'rb') as f:
|
48 |
-
database = pickle.load(f)
|
49 |
-
|
50 |
def cosine_similarity(embedding1, embedding2):
|
51 |
similarity = torch.nn.functional.cosine_similarity(embedding1.flatten().unsqueeze(0), embedding2.flatten().unsqueeze(0))
|
52 |
return similarity.item()
|
53 |
|
54 |
-
def
|
55 |
-
best_match = None
|
56 |
-
best_similarity = threshold
|
57 |
-
for name, db_embeddings in database.items():
|
58 |
-
for db_embedding in db_embeddings:
|
59 |
-
db_embedding = torch.tensor(db_embedding).to(device)
|
60 |
-
similarity = cosine_similarity(embedding, db_embedding)
|
61 |
-
if similarity > best_similarity:
|
62 |
-
best_match = name
|
63 |
-
best_similarity = similarity
|
64 |
-
if best_match is not None:
|
65 |
-
return best_match, best_similarity
|
66 |
-
return None, None
|
67 |
-
|
68 |
-
def align_faces(frame):
|
69 |
# Convert the frame to a PIL image if it's a numpy array
|
70 |
if isinstance(frame, np.ndarray):
|
71 |
frame = Image.fromarray(frame)
|
72 |
boxes, _ = mtcnn.detect(frame)
|
73 |
-
|
74 |
-
if boxes is not None:
|
75 |
faces = mtcnn(frame)
|
76 |
-
if faces is not None:
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
return
|
82 |
|
83 |
-
def
|
84 |
-
if detections is None:
|
85 |
-
return frame
|
86 |
-
if names is None:
|
87 |
-
names = ["Unknown"] * len(detections)
|
88 |
-
for i, detection in enumerate(detections):
|
89 |
-
x1, y1, x2, y2 = map(int, detection)
|
90 |
-
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
91 |
-
if names[i]:
|
92 |
-
cv2.putText(frame, names[i], (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)
|
93 |
-
return frame
|
94 |
-
|
95 |
-
def process_image(image):
|
96 |
start_time = time.time()
|
97 |
|
98 |
-
|
99 |
-
|
|
|
|
|
|
|
100 |
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
annotated_image = draw_annotations(frame, boxes, names)
|
113 |
-
result = "Face recognition complete."
|
114 |
-
else:
|
115 |
-
annotated_image = frame
|
116 |
-
result = "No faces detected."
|
117 |
|
118 |
end_time = time.time()
|
119 |
inference_time = end_time - start_time
|
120 |
-
result += f" Inference time: {inference_time:.2f} seconds"
|
121 |
|
122 |
-
|
|
|
|
|
123 |
|
124 |
# Create the Gradio interface
|
125 |
iface = gr.Interface(
|
126 |
-
fn=
|
127 |
-
inputs=gr.Image(type="pil"),
|
128 |
-
outputs=[gr.
|
129 |
-
title="Face
|
130 |
-
description="Upload
|
131 |
)
|
132 |
|
133 |
# Launch the interface
|
|
|
7 |
from PIL import Image
|
8 |
from facenet_pytorch import MTCNN
|
9 |
from transformers import ViTImageProcessor, ViTModel
|
|
|
10 |
import time
|
11 |
|
12 |
# Define the ViT class
|
|
|
42 |
transforms.ToTensor()
|
43 |
])
|
44 |
|
|
|
|
|
|
|
|
|
45 |
def cosine_similarity(embedding1, embedding2):
|
46 |
similarity = torch.nn.functional.cosine_similarity(embedding1.flatten().unsqueeze(0), embedding2.flatten().unsqueeze(0))
|
47 |
return similarity.item()
|
48 |
|
49 |
+
def align_face(frame):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
# Convert the frame to a PIL image if it's a numpy array
|
51 |
if isinstance(frame, np.ndarray):
|
52 |
frame = Image.fromarray(frame)
|
53 |
boxes, _ = mtcnn.detect(frame)
|
54 |
+
if boxes is not None and len(boxes) > 0:
|
|
|
55 |
faces = mtcnn(frame)
|
56 |
+
if faces is not None and len(faces) > 0:
|
57 |
+
face = faces[0]
|
58 |
+
# Convert the face tensor to PIL Image
|
59 |
+
face = transforms.ToPILImage()(face)
|
60 |
+
return face
|
61 |
+
return None
|
62 |
|
63 |
+
def process_images(image1, image2):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
start_time = time.time()
|
65 |
|
66 |
+
frame1 = np.array(image1)
|
67 |
+
frame2 = np.array(image2)
|
68 |
+
|
69 |
+
face1 = align_face(frame1)
|
70 |
+
face2 = align_face(frame2)
|
71 |
|
72 |
+
if face1 is None or face2 is None:
|
73 |
+
return None, "Face not detected in one or both images."
|
74 |
+
|
75 |
+
face1 = transform(face1).unsqueeze(0).to(device)
|
76 |
+
face2 = transform(face2).unsqueeze(0).to(device)
|
77 |
+
|
78 |
+
with torch.no_grad():
|
79 |
+
embedding1 = model(face1)
|
80 |
+
embedding2 = model(face2)
|
81 |
+
|
82 |
+
similarity = cosine_similarity(embedding1, embedding2)
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
end_time = time.time()
|
85 |
inference_time = end_time - start_time
|
|
|
86 |
|
87 |
+
result = f"Similarity: {similarity:.2f}\nInference time: {inference_time:.2f} seconds"
|
88 |
+
|
89 |
+
return (frame1, frame2), result
|
90 |
|
91 |
# Create the Gradio interface
|
92 |
iface = gr.Interface(
|
93 |
+
fn=process_images,
|
94 |
+
inputs=[gr.Image(type="pil"), gr.Image(type="pil")],
|
95 |
+
outputs=[gr.Gallery(), gr.Textbox()],
|
96 |
+
title="Face Verification with MTCNN and ViT",
|
97 |
+
description="Upload two images and the model will verify if the faces in both images are of the same person."
|
98 |
)
|
99 |
|
100 |
# Launch the interface
|