hasnanmr commited on
Commit
2d3c662
·
1 Parent(s): 1440546

fixing recognition integration

Browse files
Files changed (1) hide show
  1. app.py +23 -28
app.py CHANGED
@@ -1,14 +1,14 @@
1
  import torch
2
- from facenet_pytorch import MTCNN
3
- import pickle
4
  import cv2
 
5
  import gradio as gr
6
  from PIL import Image
7
- import numpy as np
8
  from transformers import ViTImageProcessor, ViTModel
9
- import torch.nn as nn
10
- from torchvision import transforms
11
-
12
 
13
  # Define the ViT class
14
  class ViT(nn.Module):
@@ -87,11 +87,13 @@ def draw_annotations(frame, detections, names=None):
87
  return frame
88
 
89
  def process_image(image):
 
 
90
  frame = np.array(image)
91
  aligned_faces, boxes = align_faces(frame, mtcnn, device)
92
 
 
93
  if aligned_faces is not None:
94
- names = []
95
  for face in aligned_faces:
96
  face = transform(face)
97
  face = face.unsqueeze(0).to(device)
@@ -108,27 +110,20 @@ def process_image(image):
108
  annotated_image = frame
109
  result = "No faces detected."
110
 
 
 
 
 
111
  return annotated_image, result
112
 
113
- def capture_and_process_image(webcam_image):
114
- captured_img, result = process_image(webcam_image)
115
- return captured_img, result
116
-
117
  # Create the Gradio interface
118
- with gr.Blocks() as demo:
119
- with gr.Row():
120
- # Webcam input component
121
- webcam_input = gr.Image(source="webcam", streaming=True, label="Webcam Input", height=483)
122
- # Captured image display
123
- captured_image = gr.Image(label="Captured Image", height=483)
124
- # Capture button
125
- capture_button = gr.Button("Capture Image")
126
- # Result output textbox
127
- result_output = gr.Textbox(label="Inference Result")
128
-
129
- # Define the button click action
130
- capture_button.click(fn=capture_and_process_image, inputs=webcam_input, outputs=[captured_image, result_output])
131
-
132
- if __name__ == "__main__":
133
- # Launch the interface with share=True to create a public link
134
- demo.launch(share=True, debug=True)
 
1
  import torch
2
+ from torch import nn
3
+ import torchvision.transforms as transforms
4
  import cv2
5
+ import numpy as np
6
  import gradio as gr
7
  from PIL import Image
8
+ from facenet_pytorch import MTCNN
9
  from transformers import ViTImageProcessor, ViTModel
10
+ import pickle
11
+ import time
 
12
 
13
  # Define the ViT class
14
  class ViT(nn.Module):
 
87
  return frame
88
 
89
  def process_image(image):
90
+ start_time = time.time()
91
+
92
  frame = np.array(image)
93
  aligned_faces, boxes = align_faces(frame, mtcnn, device)
94
 
95
+ names = []
96
  if aligned_faces is not None:
 
97
  for face in aligned_faces:
98
  face = transform(face)
99
  face = face.unsqueeze(0).to(device)
 
110
  annotated_image = frame
111
  result = "No faces detected."
112
 
113
+ end_time = time.time()
114
+ inference_time = end_time - start_time
115
+ result += f" Inference time: {inference_time:.2f} seconds"
116
+
117
  return annotated_image, result
118
 
 
 
 
 
119
  # Create the Gradio interface
120
+ iface = gr.Interface(
121
+ fn=process_image,
122
+ inputs=gr.Image(type="pil"),
123
+ outputs=gr.Image(type="numpy"),
124
+ title="Face Detection and Recognition with MTCNN and ViT",
125
+ description="Upload an image and the model will detect and align faces in it."
126
+ )
127
+
128
+ # Launch the interface
129
+ iface.launch(share=True, debug=True)