Spaces:
Sleeping
Sleeping
fixing recognition integration
Browse files
app.py
CHANGED
@@ -1,14 +1,14 @@
|
|
1 |
import torch
|
2 |
-
from
|
3 |
-
import
|
4 |
import cv2
|
|
|
5 |
import gradio as gr
|
6 |
from PIL import Image
|
7 |
-
|
8 |
from transformers import ViTImageProcessor, ViTModel
|
9 |
-
import
|
10 |
-
|
11 |
-
|
12 |
|
13 |
# Define the ViT class
|
14 |
class ViT(nn.Module):
|
@@ -87,11 +87,13 @@ def draw_annotations(frame, detections, names=None):
|
|
87 |
return frame
|
88 |
|
89 |
def process_image(image):
|
|
|
|
|
90 |
frame = np.array(image)
|
91 |
aligned_faces, boxes = align_faces(frame, mtcnn, device)
|
92 |
|
|
|
93 |
if aligned_faces is not None:
|
94 |
-
names = []
|
95 |
for face in aligned_faces:
|
96 |
face = transform(face)
|
97 |
face = face.unsqueeze(0).to(device)
|
@@ -108,27 +110,20 @@ def process_image(image):
|
|
108 |
annotated_image = frame
|
109 |
result = "No faces detected."
|
110 |
|
|
|
|
|
|
|
|
|
111 |
return annotated_image, result
|
112 |
|
113 |
-
def capture_and_process_image(webcam_image):
|
114 |
-
captured_img, result = process_image(webcam_image)
|
115 |
-
return captured_img, result
|
116 |
-
|
117 |
# Create the Gradio interface
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
# Define the button click action
|
130 |
-
capture_button.click(fn=capture_and_process_image, inputs=webcam_input, outputs=[captured_image, result_output])
|
131 |
-
|
132 |
-
if __name__ == "__main__":
|
133 |
-
# Launch the interface with share=True to create a public link
|
134 |
-
demo.launch(share=True, debug=True)
|
|
|
1 |
import torch
|
2 |
+
from torch import nn
|
3 |
+
import torchvision.transforms as transforms
|
4 |
import cv2
|
5 |
+
import numpy as np
|
6 |
import gradio as gr
|
7 |
from PIL import Image
|
8 |
+
from facenet_pytorch import MTCNN
|
9 |
from transformers import ViTImageProcessor, ViTModel
|
10 |
+
import pickle
|
11 |
+
import time
|
|
|
12 |
|
13 |
# Define the ViT class
|
14 |
class ViT(nn.Module):
|
|
|
87 |
return frame
|
88 |
|
89 |
def process_image(image):
|
90 |
+
start_time = time.time()
|
91 |
+
|
92 |
frame = np.array(image)
|
93 |
aligned_faces, boxes = align_faces(frame, mtcnn, device)
|
94 |
|
95 |
+
names = []
|
96 |
if aligned_faces is not None:
|
|
|
97 |
for face in aligned_faces:
|
98 |
face = transform(face)
|
99 |
face = face.unsqueeze(0).to(device)
|
|
|
110 |
annotated_image = frame
|
111 |
result = "No faces detected."
|
112 |
|
113 |
+
end_time = time.time()
|
114 |
+
inference_time = end_time - start_time
|
115 |
+
result += f" Inference time: {inference_time:.2f} seconds"
|
116 |
+
|
117 |
return annotated_image, result
|
118 |
|
|
|
|
|
|
|
|
|
119 |
# Create the Gradio interface
|
120 |
+
iface = gr.Interface(
|
121 |
+
fn=process_image,
|
122 |
+
inputs=gr.Image(type="pil"),
|
123 |
+
outputs=gr.Image(type="numpy"),
|
124 |
+
title="Face Detection and Recognition with MTCNN and ViT",
|
125 |
+
description="Upload an image and the model will detect and align faces in it."
|
126 |
+
)
|
127 |
+
|
128 |
+
# Launch the interface
|
129 |
+
iface.launch(share=True, debug=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|