Spaces:

hasnanmr
/

recognition_anti-spoofing

Sleeping

App Files Files Community

hasnanmr commited on Jul 10, 2024

Commit

d621e56

1 Parent(s): 07ecf45

add model recognition

Browse files

Files changed (2) hide show

app.py +99 -49
requirements.txt +0 -2

app.py CHANGED Viewed

@@ -1,50 +1,71 @@
-# import streamlit as st
-# import torch
-# from facenet_pytorch import MTCNN
-# import pickle
-# import cv2
-# from PIL import Image
-# import numpy as np
-# from transformers import ViTImageProcessor, ViTModel
-# import torch.nn as nn
-# from torchvision import transforms
-# from streamlit_webrtc import webrtc_streamer, VideoProcessorBase, WebRtcMode
-# import av
-# class ViT(nn.Module):
-#     def __init__(self, base_model):
-#         super(ViT, self).__init__()
-#         self.base_model = base_model
-#     def forward(self, x):
-#         x = self.base_model(x).pooler_output
-#         return x
-# @st.cache_resource
-# def load_model():
-#     model_name = "google/vit-base-patch16-224"
-#     processor = ViTImageProcessor.from_pretrained(model_name)
-#     base_model = ViTModel.from_pretrained("WinKawaks/vit-small-patch16-224")
-#     model = ViT(base_model)
-#     model.load_state_dict(torch.load('faceViT6.pth', map_location=torch.device('cpu')))
-#     model.eval()
-#     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-#     model.to(device)
-#     return model, processor, device
 import torch
 from facenet_pytorch import MTCNN
 import cv2
-import numpy as np
 import gradio as gr
 from PIL import Image
-# Set the device
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-# Initialize MTCNN
 mtcnn = MTCNN(keep_all=True, min_face_size=20, thresholds=[0.6, 0.7, 0.7], device=device)
 def align_faces(frame, mtcnn, device):
     boxes, _ = mtcnn.detect(frame)
     aligned_faces = []
@@ -69,17 +90,46 @@ def draw_annotations(frame, detections, names=None):
 def process_image(image):
     frame = np.array(image)
     aligned_faces, boxes = align_faces(frame, mtcnn, device)
-    annotated_image = draw_annotations(frame, boxes)
-    return annotated_image
 # Create the Gradio interface
-iface = gr.Interface(
-    fn=process_image,
-    inputs=gr.Image(type="pil"),
-    outputs=gr.Image(type="numpy"),
-    title="Face Detection with MTCNN",
-    description="Upload an image and the model will detect and align faces in it."
-)
-# Launch the interface
-iface.launch(share=True, debug=True)

+import streamlit as st
 import torch
 from facenet_pytorch import MTCNN
+import pickle
 import cv2
 import gradio as gr
 from PIL import Image
+import numpy as np
+from transformers import ViTImageProcessor, ViTModel
+import torch.nn as nn
+from torchvision import transforms
+# Define the ViT class
+class ViT(nn.Module):
+    def __init__(self, base_model):
+        super(ViT, self).__init__()
+        self.base_model = base_model
+    def forward(self, x):
+        x = self.base_model(x).pooler_output
+        return x
+# Load the model and processor
+model_name = "google/vit-base-patch16-224"
+processor = ViTImageProcessor.from_pretrained(model_name)
+base_model = ViTModel.from_pretrained("WinKawaks/vit-small-patch16-224")
+model = ViT(base_model)
+model.load_state_dict(torch.load('faceViT6.pth'))
+# Set the model to evaluation mode
+model.eval()
+# Check if CUDA is available and move the model to GPU if it is
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+model.to(device)
+# Initialize MTCNN for face detection
 mtcnn = MTCNN(keep_all=True, min_face_size=20, thresholds=[0.6, 0.7, 0.7], device=device)
+# Define the transformation
+transform = transforms.Compose([
+    transforms.Resize((224, 224)),
+    transforms.ToTensor()
+])
+# Load the database of embeddings
+with open('face_database_ViT6.pkl', 'rb') as f:
+    database = pickle.load(f)
+def cosine_similarity(embedding1, embedding2):
+    similarity = torch.nn.functional.cosine_similarity(embedding1.unsqueeze(0), embedding2.unsqueeze(0))
+    return similarity.item()
+def compare_embeddings(embedding, database, threshold=0.9):
+    best_match = None
+    best_similarity = threshold
+    for name, db_embeddings in database.items():
+        for db_embedding in db_embeddings:
+            db_embedding = torch.tensor(db_embedding).to(device)
+            similarity = cosine_similarity(embedding, db_embedding)
+            if similarity > best_similarity:
+                best_match = name
+                best_similarity = similarity
+    if best_match is not None:
+        return best_match, best_similarity
+    return None, None
 def align_faces(frame, mtcnn, device):
     boxes, _ = mtcnn.detect(frame)
     aligned_faces = []
 def process_image(image):
     frame = np.array(image)
     aligned_faces, boxes = align_faces(frame, mtcnn, device)
+    if aligned_faces is not None:
+        names = []
+        for face in aligned_faces:
+            face = transform(face)
+            face = face.unsqueeze(0).to(device)
+            with torch.no_grad():
+                embedding = model(face)
+            name, similarity = compare_embeddings(embedding, database)
+            if name is not None:
+                names.append(f"{name} ({similarity:.2f})")
+            else:
+                names.append("Unknown")
+        annotated_image = draw_annotations(frame, boxes, names)
+        result = "Face recognition complete."
+    else:
+        annotated_image = frame
+        result = "No faces detected."
+    return annotated_image, result
+def capture_and_process_image(webcam_image):
+    captured_img, result = process_image(webcam_image)
+    return captured_img, result
 # Create the Gradio interface
+with gr.Blocks() as demo:
+    with gr.Row():
+        # Webcam input component
+        webcam_input = gr.Image(source="webcam", streaming=True, label="Webcam Input", height=483)
+        # Captured image display
+        captured_image = gr.Image(label="Captured Image", height=483)
+    # Capture button
+    capture_button = gr.Button("Capture Image")
+    # Result output textbox
+    result_output = gr.Textbox(label="Inference Result")
+    # Define the button click action
+    capture_button.click(fn=capture_and_process_image, inputs=webcam_input, outputs=[captured_image, result_output])
+if __name__ == "__main__":
+    # Launch the interface with share=True to create a public link
+    demo.launch(share=True, debug=True)

requirements.txt CHANGED Viewed

@@ -2,8 +2,6 @@ torch==2.2.1
 torchaudio==2.2.1
 torchsummary==1.5.1
 torchvision==0.17.1
-streamlit==1.31.1
-streamlit-webrtc==0.47.6
 sympy==1.12
 tenacity==8.2.3
 tensorboard==2.15.2

 torchaudio==2.2.1
 torchsummary==1.5.1
 torchvision==0.17.1
 sympy==1.12
 tenacity==8.2.3
 tensorboard==2.15.2