Spaces:

hasnanmr
/

recognition_anti-spoofing

Sleeping

App Files Files Community

hasnanmr commited on Jul 22, 2024

Commit

98ffefb

1 Parent(s): d5f0cdf

add more adjustment

Browse files

Files changed (4) hide show

.gitignore +3 -2
app.py +1 -1
app_facevit.py +34 -64
faceNet_update_transformation.pth +3 -0

.gitignore CHANGED Viewed

@@ -1,3 +1,4 @@
-app_facenet.py
 flagged
-best_vit10.pth

+app_facevit.py
 flagged
+best_vit10.pth
+.gitattributes

app.py CHANGED Viewed

@@ -38,7 +38,7 @@ model.load_state_dict(torch.load(model_path))
 # Initialize MTCNN for face detection
-mtcnn = MTCNN(keep_all=True, min_face_size=12, post_process=False, device=device)
 def align_face(frame):
     # Convert the frame to a PIL image if it's a numpy array

 # Initialize MTCNN for face detection
+mtcnn = MTCNN(keep_all=True, min_face_size=12,device=device)
 def align_face(frame):
     # Convert the frame to a PIL image if it's a numpy array

app_facevit.py CHANGED Viewed

@@ -1,48 +1,32 @@
 import torch
-import torch.nn as nn
 import numpy as np
-from PIL import Image, ImageDraw
-from torchvision import transforms
-from transformers import ViTImageProcessor, ViTModel
-from facenet_pytorch import MTCNN
 import gradio as gr
 import time
-# Define the Vision Transformer (ViT) architecture
-class ViT(nn.Module):
-    def __init__(self, base_model):
-        super(ViT, self).__init__()
-        self.base_model = base_model
-        self.dropout = nn.Dropout(p=0.2)
-        self.fc = nn.Linear(base_model.config.hidden_size, 512)
-        self.dropout2 = nn.Dropout(p=0.2)
-        self.l2_norm = nn.functional.normalize
-    def forward(self, x):
-        x = self.base_model(x).pooler_output
-        x = self.dropout(x)
-        x = self.fc(x)
-        x = self.dropout2(x)
-        x = self.l2_norm(x, p=2, dim=1)  # Apply L2 normalization
-        return x
-# Load the pre-trained ViT model and processor
-model_name = "google/vit-base-patch16-224"
-processor = ViTImageProcessor.from_pretrained(model_name)
-base_model = ViTModel.from_pretrained(model_name)
-model = ViT(base_model)
-model_path = r'best_vit11.pth'
-model.load_state_dict(torch.load(model_path))
-model.eval().to('cuda' if torch.cuda.is_available() else 'cpu')
-# Initialize MTCNN for face detection
-mtcnn = MTCNN(keep_all=True, min_face_size=20, device='cuda' if torch.cuda.is_available() else 'cpu')
 def align_face(frame):
     # Convert the frame to a PIL image if it's a numpy array
     if isinstance(frame, np.ndarray):
         frame = Image.fromarray(frame)
-    boxes, _ = mtcnn.detect(frame)
     if boxes is not None and len(boxes) > 0:
         faces = mtcnn(frame)
         if faces is not None and len(faces) > 0:
@@ -51,18 +35,13 @@ def align_face(frame):
             face = transforms.ToPILImage()(face)
             return face, boxes[0]
     return None, None
 def draw_bounding_box(image, box):
     draw = ImageDraw.Draw(image)
     draw.rectangle(box.tolist(), outline="red", width=3)
     return image
-def euclidean_distance(embedding1, embedding2):
-    return np.linalg.norm(embedding1 - embedding2)
-def cosine_similarity(embedding1, embedding2):
-    return np.dot(embedding1, embedding2) / (np.linalg.norm(embedding1) * np.linalg.norm(embedding2))
 def process_images(image1, image2):
     start_time = time.time()
@@ -75,24 +54,20 @@ def process_images(image1, image2):
     if face1 is None or face2 is None:
         return None, "Face not detected in one or both images."
-    # Use processor to preprocess the images
-    face1 = processor(images=face1, return_tensors="pt").pixel_values.to('cuda' if torch.cuda.is_available() else 'cpu')
-    face2 = processor(images=face2, return_tensors="pt").pixel_values.to('cuda' if torch.cuda.is_available() else 'cpu')
     with torch.no_grad():
-        embedding1 = model(face1).cpu().numpy()
-        embedding2 = model(face2).cpu().numpy()
-    # Flatten the embeddings if necessary (ensuring they are 1D)
-    embedding1 = embedding1.flatten()
-    embedding2 = embedding2.flatten()
-    euclidean_dist = euclidean_distance(embedding1, embedding2)
-    cosine_sim = cosine_similarity(embedding1, embedding2)
-    is_match = euclidean_dist < 0.2
     # Calculate confidence
-    confidence = max(0.0, 1.0 - euclidean_dist / 1.0)  # Ensure confidence is between 0 and 1
     print(f'confidence={confidence}')
     end_time = time.time()
     inference_time = end_time - start_time
@@ -101,21 +76,16 @@ def process_images(image1, image2):
     image1_with_box = draw_bounding_box(image1, box1)
     image2_with_box = draw_bounding_box(image2, box2)
-    result = f"Euclidean Distance: {euclidean_dist:.2f}\n"
-    # result += f"Cosine Similarity: {cosine_sim:.2f}\n"
-    result += f"Match: {is_match}\n"
-    result += f"Inference time: {inference_time:.2f} seconds"
     return [image1_with_box, image2_with_box], result
 # Create the Gradio interface
 iface = gr.Interface(
     fn=process_images,
     inputs=[gr.Image(type="pil"), gr.Image(type="pil")],
     outputs=[gr.Gallery(), gr.Textbox()],
-    title="Face Verification with Vision Transformer",
     description="Upload two images and the model will verify if the faces in both images are of the same person."
 )
 # Launch the interface
 iface.launch(share=True, debug=True)

 import torch
+import torchvision.transforms as transforms
 import numpy as np
 import gradio as gr
+from PIL import Image, ImageDraw
+from facenet_pytorch import MTCNN, InceptionResnetV1
 import time
+# Initialize MTCNN for face detection with smaller face size detection
+mtcnn = MTCNN(keep_all=True, device='cuda' if torch.cuda.is_available() else 'cpu', min_face_size=20)
+# Load the pre-trained FaceNet model
+facenet = InceptionResnetV1(pretrained='vggface2').eval().to('cuda' if torch.cuda.is_available() else 'cpu')
+model_path = r'faceNet_update_transformation.pth'
+model_state_dict = torch.load(model_path)
+facenet.load_state_dict(model_state_dict)
+facenet.eval()  # Set the model to evaluation mode
+# Define the transformation with normalization
+val_test_transform = transforms.Compose([
+    transforms.Resize((160, 160)),  # FaceNet expects 160x160 input
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+])
+def compare_faces(embedding1, embedding2, threshold=0.2):  # Adjusted threshold
+    dist = np.linalg.norm(embedding1 - embedding2)
+    return dist, dist < threshold
 def align_face(frame):
     # Convert the frame to a PIL image if it's a numpy array
     if isinstance(frame, np.ndarray):
         frame = Image.fromarray(frame)
+        boxes, _ = mtcnn.detect(frame)
     if boxes is not None and len(boxes) > 0:
         faces = mtcnn(frame)
         if faces is not None and len(faces) > 0:
             face = transforms.ToPILImage()(face)
             return face, boxes[0]
     return None, None
 def draw_bounding_box(image, box):
     draw = ImageDraw.Draw(image)
     draw.rectangle(box.tolist(), outline="red", width=3)
     return image
+def l2_normalize(tensor):
+    norm = np.linalg.norm(tensor, ord=2, axis=1, keepdims=True)
+    return tensor / norm
 def process_images(image1, image2):
     start_time = time.time()
     if face1 is None or face2 is None:
         return None, "Face not detected in one or both images."
+    face1 = val_test_transform(face1).unsqueeze(0).to('cuda' if torch.cuda.is_available() else 'cpu')
+    face2 = val_test_transform(face2).unsqueeze(0).to('cuda' if torch.cuda.is_available() else 'cpu')
     with torch.no_grad():
+        embedding1 = facenet(face1).cpu().numpy()
+        embedding2 = facenet(face2).cpu().numpy()
+    embedding1 = l2_normalize(embedding1)
+    embedding2 = l2_normalize(embedding2)
+    distance, is_match = compare_faces(embedding1, embedding2, threshold=0.2)
     # Calculate confidence
+    confidence = max(0.0, 1.0 - distance / 1.0)  # Ensure confidence is between 0 and 1
     print(f'confidence={confidence}')
     end_time = time.time()
     inference_time = end_time - start_time
     image1_with_box = draw_bounding_box(image1, box1)
     image2_with_box = draw_bounding_box(image2, box2)
+    result = f"Distance: {distance:.2f}\nMatch: {is_match}\nInference time: {inference_time:.2f} seconds"
     return [image1_with_box, image2_with_box], result
 # Create the Gradio interface
 iface = gr.Interface(
     fn=process_images,
     inputs=[gr.Image(type="pil"), gr.Image(type="pil")],
     outputs=[gr.Gallery(), gr.Textbox()],
+    title="Face Verification with FaceNet",
     description="Upload two images and the model will verify if the faces in both images are of the same person."
 )
 # Launch the interface
 iface.launch(share=True, debug=True)

faceNet_update_transformation.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:86b4b567798373e423655892a9a377038d2cfae87bbb073d3d9ae83b93a94081
+size 112028666