Spaces:

hasnanmr
/

recognition_anti-spoofing

Sleeping

App Files Files Community

hasnanmr commited on Jul 22, 2024

Commit

d5f0cdf

1 Parent(s): 5587663

add another model state of vit

Browse files

Files changed (1) hide show

app_facevit.py +121 -0

app_facevit.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import torch
+import torch.nn as nn
+import numpy as np
+from PIL import Image, ImageDraw
+from torchvision import transforms
+from transformers import ViTImageProcessor, ViTModel
+from facenet_pytorch import MTCNN
+import gradio as gr
+import time
+# Define the Vision Transformer (ViT) architecture
+class ViT(nn.Module):
+    def __init__(self, base_model):
+        super(ViT, self).__init__()
+        self.base_model = base_model
+        self.dropout = nn.Dropout(p=0.2)
+        self.fc = nn.Linear(base_model.config.hidden_size, 512)
+        self.dropout2 = nn.Dropout(p=0.2)
+        self.l2_norm = nn.functional.normalize
+    def forward(self, x):
+        x = self.base_model(x).pooler_output
+        x = self.dropout(x)
+        x = self.fc(x)
+        x = self.dropout2(x)
+        x = self.l2_norm(x, p=2, dim=1)  # Apply L2 normalization
+        return x
+# Load the pre-trained ViT model and processor
+model_name = "google/vit-base-patch16-224"
+processor = ViTImageProcessor.from_pretrained(model_name)
+base_model = ViTModel.from_pretrained(model_name)
+model = ViT(base_model)
+model_path = r'best_vit11.pth'
+model.load_state_dict(torch.load(model_path))
+model.eval().to('cuda' if torch.cuda.is_available() else 'cpu')
+# Initialize MTCNN for face detection
+mtcnn = MTCNN(keep_all=True, min_face_size=20, device='cuda' if torch.cuda.is_available() else 'cpu')
+def align_face(frame):
+    # Convert the frame to a PIL image if it's a numpy array
+    if isinstance(frame, np.ndarray):
+        frame = Image.fromarray(frame)
+    boxes, _ = mtcnn.detect(frame)
+    if boxes is not None and len(boxes) > 0:
+        faces = mtcnn(frame)
+        if faces is not None and len(faces) > 0:
+            face = faces[0]
+            # Convert the face tensor to PIL Image
+            face = transforms.ToPILImage()(face)
+            return face, boxes[0]
+    return None, None
+def draw_bounding_box(image, box):
+    draw = ImageDraw.Draw(image)
+    draw.rectangle(box.tolist(), outline="red", width=3)
+    return image
+def euclidean_distance(embedding1, embedding2):
+    return np.linalg.norm(embedding1 - embedding2)
+def cosine_similarity(embedding1, embedding2):
+    return np.dot(embedding1, embedding2) / (np.linalg.norm(embedding1) * np.linalg.norm(embedding2))
+def process_images(image1, image2):
+    start_time = time.time()
+    frame1 = np.array(image1)
+    frame2 = np.array(image2)
+    face1, box1 = align_face(frame1)
+    face2, box2 = align_face(frame2)
+    if face1 is None or face2 is None:
+        return None, "Face not detected in one or both images."
+    # Use processor to preprocess the images
+    face1 = processor(images=face1, return_tensors="pt").pixel_values.to('cuda' if torch.cuda.is_available() else 'cpu')
+    face2 = processor(images=face2, return_tensors="pt").pixel_values.to('cuda' if torch.cuda.is_available() else 'cpu')
+    with torch.no_grad():
+        embedding1 = model(face1).cpu().numpy()
+        embedding2 = model(face2).cpu().numpy()
+    # Flatten the embeddings if necessary (ensuring they are 1D)
+    embedding1 = embedding1.flatten()
+    embedding2 = embedding2.flatten()
+    euclidean_dist = euclidean_distance(embedding1, embedding2)
+    cosine_sim = cosine_similarity(embedding1, embedding2)
+    is_match = euclidean_dist < 0.2
+    # Calculate confidence
+    confidence = max(0.0, 1.0 - euclidean_dist / 1.0)  # Ensure confidence is between 0 and 1
+    print(f'confidence={confidence}')
+    end_time = time.time()
+    inference_time = end_time - start_time
+    # Draw bounding boxes on the original images
+    image1_with_box = draw_bounding_box(image1, box1)
+    image2_with_box = draw_bounding_box(image2, box2)
+    result = f"Euclidean Distance: {euclidean_dist:.2f}\n"
+    # result += f"Cosine Similarity: {cosine_sim:.2f}\n"
+    result += f"Match: {is_match}\n"
+    result += f"Inference time: {inference_time:.2f} seconds"
+    return [image1_with_box, image2_with_box], result
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=process_images,
+    inputs=[gr.Image(type="pil"), gr.Image(type="pil")],
+    outputs=[gr.Gallery(), gr.Textbox()],
+    title="Face Verification with Vision Transformer",
+    description="Upload two images and the model will verify if the faces in both images are of the same person."
+)
+# Launch the interface
+iface.launch(share=True, debug=True)