David Driscoll
commited on
Commit
·
4a53aae
1
Parent(s):
74d3d77
Overhaul lag reduction
Browse files
app.py
CHANGED
@@ -9,9 +9,16 @@ import mediapipe as mp
|
|
9 |
from fer import FER # Facial emotion recognition
|
10 |
|
11 |
# -----------------------------
|
12 |
-
# Configuration
|
13 |
# -----------------------------
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
# -----------------------------
|
17 |
# Global caches for overlay info and frame counters
|
@@ -34,16 +41,18 @@ face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5)
|
|
34 |
object_detection_model = models.detection.fasterrcnn_resnet50_fpn(
|
35 |
weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT
|
36 |
)
|
37 |
-
object_detection_model.eval()
|
|
|
38 |
obj_transform = transforms.Compose([transforms.ToTensor()])
|
39 |
|
|
|
|
|
40 |
emotion_detector = FER(mtcnn=True)
|
41 |
|
42 |
# -----------------------------
|
43 |
-
#
|
44 |
# -----------------------------
|
45 |
def draw_posture_overlay(raw_frame, landmarks):
|
46 |
-
# Draw each landmark as a small circle
|
47 |
for (x, y) in landmarks:
|
48 |
cv2.circle(raw_frame, (x, y), 4, (0, 255, 0), -1)
|
49 |
return raw_frame
|
@@ -55,27 +64,42 @@ def draw_boxes_overlay(raw_frame, boxes, color):
|
|
55 |
|
56 |
# -----------------------------
|
57 |
# Heavy (Synchronous) Detection Functions
|
58 |
-
# These functions compute the overlay info on the current frame.
|
59 |
# -----------------------------
|
60 |
def compute_posture_overlay(image):
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
if pose_results.pose_landmarks:
|
66 |
landmarks = []
|
67 |
for lm in pose_results.pose_landmarks.landmark:
|
68 |
-
|
|
|
|
|
|
|
69 |
text = "Posture detected"
|
70 |
else:
|
71 |
landmarks = []
|
72 |
text = "No posture detected"
|
|
|
73 |
return landmarks, text
|
74 |
|
75 |
def compute_emotion_overlay(image):
|
76 |
-
|
77 |
-
|
78 |
-
|
|
|
|
|
|
|
|
|
79 |
if emotions:
|
80 |
top_emotion, score = max(emotions[0]["emotions"].items(), key=lambda x: x[1])
|
81 |
text = f"{top_emotion} ({score:.2f})"
|
@@ -84,33 +108,48 @@ def compute_emotion_overlay(image):
|
|
84 |
return text
|
85 |
|
86 |
def compute_objects_overlay(image):
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
|
|
|
|
|
|
|
|
91 |
with torch.no_grad():
|
92 |
detections = object_detection_model([img_tensor])[0]
|
|
|
93 |
threshold = 0.8
|
94 |
boxes = []
|
95 |
for box, score in zip(detections["boxes"], detections["scores"]):
|
96 |
if score > threshold:
|
|
|
|
|
97 |
boxes.append(tuple(box.int().cpu().numpy()))
|
|
|
98 |
text = f"Detected {len(boxes)} object(s)" if boxes else "No objects detected"
|
99 |
return boxes, text
|
100 |
|
101 |
def compute_faces_overlay(image):
|
102 |
-
|
103 |
-
h, w, _ =
|
104 |
-
|
105 |
-
|
|
|
|
|
|
|
|
|
|
|
106 |
boxes = []
|
107 |
if face_results.detections:
|
108 |
for detection in face_results.detections:
|
109 |
bbox = detection.location_data.relative_bounding_box
|
110 |
-
x = int(bbox.xmin *
|
111 |
-
y = int(bbox.ymin *
|
112 |
-
box_w = int(bbox.width *
|
113 |
-
box_h = int(bbox.height *
|
|
|
|
|
114 |
boxes.append((x, y, x + box_w, y + box_h))
|
115 |
text = f"Detected {len(boxes)} face(s)"
|
116 |
else:
|
@@ -118,62 +157,69 @@ def compute_faces_overlay(image):
|
|
118 |
return boxes, text
|
119 |
|
120 |
# -----------------------------
|
121 |
-
# Main Analysis Functions
|
122 |
-
# They update the cache every SKIP_RATE frames and always return a current frame with overlay.
|
123 |
# -----------------------------
|
124 |
def analyze_posture_current(image):
|
125 |
global posture_cache
|
126 |
posture_cache["counter"] += 1
|
127 |
-
current_frame = np.array(image)
|
128 |
-
|
129 |
if posture_cache["counter"] % SKIP_RATE == 0 or posture_cache["landmarks"] is None:
|
130 |
landmarks, text = compute_posture_overlay(image)
|
131 |
posture_cache["landmarks"] = landmarks
|
132 |
posture_cache["text"] = text
|
133 |
-
|
134 |
output = current_frame.copy()
|
135 |
if posture_cache["landmarks"]:
|
136 |
output = draw_posture_overlay(output, posture_cache["landmarks"])
|
|
|
137 |
return output, f"Posture Analysis: {posture_cache['text']}"
|
138 |
|
139 |
def analyze_emotion_current(image):
|
140 |
global emotion_cache
|
141 |
emotion_cache["counter"] += 1
|
142 |
current_frame = np.array(image)
|
|
|
143 |
if emotion_cache["counter"] % SKIP_RATE == 0 or emotion_cache["text"] is None:
|
144 |
text = compute_emotion_overlay(image)
|
145 |
emotion_cache["text"] = text
|
146 |
-
|
147 |
return current_frame, f"Emotion Analysis: {emotion_cache['text']}"
|
148 |
|
149 |
def analyze_objects_current(image):
|
150 |
global objects_cache
|
151 |
objects_cache["counter"] += 1
|
152 |
current_frame = np.array(image)
|
|
|
153 |
if objects_cache["counter"] % SKIP_RATE == 0 or objects_cache["boxes"] is None:
|
154 |
boxes, text = compute_objects_overlay(image)
|
155 |
objects_cache["boxes"] = boxes
|
156 |
objects_cache["text"] = text
|
|
|
157 |
output = current_frame.copy()
|
158 |
if objects_cache["boxes"]:
|
159 |
output = draw_boxes_overlay(output, objects_cache["boxes"], (255, 255, 0))
|
|
|
160 |
return output, f"Object Detection: {objects_cache['text']}"
|
161 |
|
162 |
def analyze_faces_current(image):
|
163 |
global faces_cache
|
164 |
faces_cache["counter"] += 1
|
165 |
current_frame = np.array(image)
|
|
|
166 |
if faces_cache["counter"] % SKIP_RATE == 0 or faces_cache["boxes"] is None:
|
167 |
boxes, text = compute_faces_overlay(image)
|
168 |
faces_cache["boxes"] = boxes
|
169 |
faces_cache["text"] = text
|
|
|
170 |
output = current_frame.copy()
|
171 |
if faces_cache["boxes"]:
|
172 |
output = draw_boxes_overlay(output, faces_cache["boxes"], (0, 0, 255))
|
|
|
173 |
return output, f"Face Detection: {faces_cache['text']}"
|
174 |
|
175 |
# -----------------------------
|
176 |
-
# Custom CSS
|
177 |
# -----------------------------
|
178 |
custom_css = """
|
179 |
@import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@400;700&display=swap');
|
@@ -206,7 +252,7 @@ body {
|
|
206 |
"""
|
207 |
|
208 |
# -----------------------------
|
209 |
-
# Create Individual Interfaces
|
210 |
# -----------------------------
|
211 |
posture_interface = gr.Interface(
|
212 |
fn=analyze_posture_current,
|
@@ -214,7 +260,7 @@ posture_interface = gr.Interface(
|
|
214 |
outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Posture Analysis")],
|
215 |
title="Posture Analysis",
|
216 |
description="Detects your posture using MediaPipe.",
|
217 |
-
live=True
|
218 |
)
|
219 |
|
220 |
emotion_interface = gr.Interface(
|
@@ -223,7 +269,7 @@ emotion_interface = gr.Interface(
|
|
223 |
outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Emotion Analysis")],
|
224 |
title="Emotion Analysis",
|
225 |
description="Detects facial emotions using FER.",
|
226 |
-
live=
|
227 |
)
|
228 |
|
229 |
objects_interface = gr.Interface(
|
@@ -232,7 +278,7 @@ objects_interface = gr.Interface(
|
|
232 |
outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Object Detection")],
|
233 |
title="Object Detection",
|
234 |
description="Detects objects using a pretrained Faster R-CNN.",
|
235 |
-
live=
|
236 |
)
|
237 |
|
238 |
faces_interface = gr.Interface(
|
@@ -241,11 +287,11 @@ faces_interface = gr.Interface(
|
|
241 |
outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Face Detection")],
|
242 |
title="Face Detection",
|
243 |
description="Detects faces using MediaPipe.",
|
244 |
-
live=
|
245 |
)
|
246 |
|
247 |
# -----------------------------
|
248 |
-
# Create a Tabbed Interface
|
249 |
# -----------------------------
|
250 |
tabbed_interface = gr.TabbedInterface(
|
251 |
interface_list=[posture_interface, emotion_interface, objects_interface, faces_interface],
|
@@ -253,12 +299,15 @@ tabbed_interface = gr.TabbedInterface(
|
|
253 |
)
|
254 |
|
255 |
# -----------------------------
|
256 |
-
# Wrap
|
257 |
# -----------------------------
|
258 |
demo = gr.Blocks(css=custom_css)
|
259 |
with demo:
|
260 |
gr.Markdown("<h1 class='gradio-title'>Real-Time Multi-Analysis App</h1>")
|
261 |
-
gr.Markdown(
|
|
|
|
|
|
|
262 |
tabbed_interface.render()
|
263 |
|
264 |
if __name__ == "__main__":
|
|
|
9 |
from fer import FER # Facial emotion recognition
|
10 |
|
11 |
# -----------------------------
|
12 |
+
# Configuration
|
13 |
# -----------------------------
|
14 |
+
# 1) Increase skip rate
|
15 |
+
SKIP_RATE = 15
|
16 |
+
|
17 |
+
# 2) Use GPU if available
|
18 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
19 |
+
|
20 |
+
# 3) Desired input size for faster inference
|
21 |
+
DESIRED_SIZE = (640, 480)
|
22 |
|
23 |
# -----------------------------
|
24 |
# Global caches for overlay info and frame counters
|
|
|
41 |
object_detection_model = models.detection.fasterrcnn_resnet50_fpn(
|
42 |
weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT
|
43 |
)
|
44 |
+
object_detection_model.eval().to(device) # Move model to GPU (if available)
|
45 |
+
|
46 |
obj_transform = transforms.Compose([transforms.ToTensor()])
|
47 |
|
48 |
+
# If the FER library supports GPU, it may pick it up automatically.
|
49 |
+
# Some versions allow device specification, e.g. FER(mtcnn=True, device=device).
|
50 |
emotion_detector = FER(mtcnn=True)
|
51 |
|
52 |
# -----------------------------
|
53 |
+
# Overlay Drawing Functions
|
54 |
# -----------------------------
|
55 |
def draw_posture_overlay(raw_frame, landmarks):
|
|
|
56 |
for (x, y) in landmarks:
|
57 |
cv2.circle(raw_frame, (x, y), 4, (0, 255, 0), -1)
|
58 |
return raw_frame
|
|
|
64 |
|
65 |
# -----------------------------
|
66 |
# Heavy (Synchronous) Detection Functions
|
|
|
67 |
# -----------------------------
|
68 |
def compute_posture_overlay(image):
|
69 |
+
# Convert to BGR for MediaPipe
|
70 |
+
frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
71 |
+
h, w, _ = frame_bgr.shape
|
72 |
+
|
73 |
+
# 2) Downscale before processing (optional for posture)
|
74 |
+
frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
|
75 |
+
small_h, small_w, _ = frame_bgr_small.shape
|
76 |
+
|
77 |
+
frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
|
78 |
+
pose_results = pose.process(frame_rgb_small)
|
79 |
+
|
80 |
+
# Scale landmarks back up to original size if needed
|
81 |
if pose_results.pose_landmarks:
|
82 |
landmarks = []
|
83 |
for lm in pose_results.pose_landmarks.landmark:
|
84 |
+
# Rescale from the smaller frame to the original size
|
85 |
+
x = int(lm.x * small_w * (w / small_w))
|
86 |
+
y = int(lm.y * small_h * (h / small_h))
|
87 |
+
landmarks.append((x, y))
|
88 |
text = "Posture detected"
|
89 |
else:
|
90 |
landmarks = []
|
91 |
text = "No posture detected"
|
92 |
+
|
93 |
return landmarks, text
|
94 |
|
95 |
def compute_emotion_overlay(image):
|
96 |
+
# Convert to BGR
|
97 |
+
frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
98 |
+
# 2) Downscale
|
99 |
+
frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
|
100 |
+
frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
|
101 |
+
|
102 |
+
emotions = emotion_detector.detect_emotions(frame_rgb_small)
|
103 |
if emotions:
|
104 |
top_emotion, score = max(emotions[0]["emotions"].items(), key=lambda x: x[1])
|
105 |
text = f"{top_emotion} ({score:.2f})"
|
|
|
108 |
return text
|
109 |
|
110 |
def compute_objects_overlay(image):
|
111 |
+
frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
112 |
+
# 2) Downscale
|
113 |
+
frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
|
114 |
+
frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
|
115 |
+
|
116 |
+
image_pil = Image.fromarray(frame_rgb_small)
|
117 |
+
img_tensor = obj_transform(image_pil).to(device)
|
118 |
+
|
119 |
with torch.no_grad():
|
120 |
detections = object_detection_model([img_tensor])[0]
|
121 |
+
|
122 |
threshold = 0.8
|
123 |
boxes = []
|
124 |
for box, score in zip(detections["boxes"], detections["scores"]):
|
125 |
if score > threshold:
|
126 |
+
# box is in the scaled-down coordinates;
|
127 |
+
# you may want to scale them back to the original if needed
|
128 |
boxes.append(tuple(box.int().cpu().numpy()))
|
129 |
+
|
130 |
text = f"Detected {len(boxes)} object(s)" if boxes else "No objects detected"
|
131 |
return boxes, text
|
132 |
|
133 |
def compute_faces_overlay(image):
|
134 |
+
frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
135 |
+
h, w, _ = frame_bgr.shape
|
136 |
+
# 2) Downscale
|
137 |
+
frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
|
138 |
+
small_h, small_w, _ = frame_bgr_small.shape
|
139 |
+
|
140 |
+
frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
|
141 |
+
face_results = face_detection.process(frame_rgb_small)
|
142 |
+
|
143 |
boxes = []
|
144 |
if face_results.detections:
|
145 |
for detection in face_results.detections:
|
146 |
bbox = detection.location_data.relative_bounding_box
|
147 |
+
x = int(bbox.xmin * small_w)
|
148 |
+
y = int(bbox.ymin * small_h)
|
149 |
+
box_w = int(bbox.width * small_w)
|
150 |
+
box_h = int(bbox.height * small_h)
|
151 |
+
# Scale bounding box coords back to original if you need full resolution
|
152 |
+
# E.g., x_original = int(x * (w / small_w)), etc.
|
153 |
boxes.append((x, y, x + box_w, y + box_h))
|
154 |
text = f"Detected {len(boxes)} face(s)"
|
155 |
else:
|
|
|
157 |
return boxes, text
|
158 |
|
159 |
# -----------------------------
|
160 |
+
# Main Analysis Functions
|
|
|
161 |
# -----------------------------
|
162 |
def analyze_posture_current(image):
|
163 |
global posture_cache
|
164 |
posture_cache["counter"] += 1
|
165 |
+
current_frame = np.array(image)
|
166 |
+
|
167 |
if posture_cache["counter"] % SKIP_RATE == 0 or posture_cache["landmarks"] is None:
|
168 |
landmarks, text = compute_posture_overlay(image)
|
169 |
posture_cache["landmarks"] = landmarks
|
170 |
posture_cache["text"] = text
|
171 |
+
|
172 |
output = current_frame.copy()
|
173 |
if posture_cache["landmarks"]:
|
174 |
output = draw_posture_overlay(output, posture_cache["landmarks"])
|
175 |
+
|
176 |
return output, f"Posture Analysis: {posture_cache['text']}"
|
177 |
|
178 |
def analyze_emotion_current(image):
|
179 |
global emotion_cache
|
180 |
emotion_cache["counter"] += 1
|
181 |
current_frame = np.array(image)
|
182 |
+
|
183 |
if emotion_cache["counter"] % SKIP_RATE == 0 or emotion_cache["text"] is None:
|
184 |
text = compute_emotion_overlay(image)
|
185 |
emotion_cache["text"] = text
|
186 |
+
|
187 |
return current_frame, f"Emotion Analysis: {emotion_cache['text']}"
|
188 |
|
189 |
def analyze_objects_current(image):
|
190 |
global objects_cache
|
191 |
objects_cache["counter"] += 1
|
192 |
current_frame = np.array(image)
|
193 |
+
|
194 |
if objects_cache["counter"] % SKIP_RATE == 0 or objects_cache["boxes"] is None:
|
195 |
boxes, text = compute_objects_overlay(image)
|
196 |
objects_cache["boxes"] = boxes
|
197 |
objects_cache["text"] = text
|
198 |
+
|
199 |
output = current_frame.copy()
|
200 |
if objects_cache["boxes"]:
|
201 |
output = draw_boxes_overlay(output, objects_cache["boxes"], (255, 255, 0))
|
202 |
+
|
203 |
return output, f"Object Detection: {objects_cache['text']}"
|
204 |
|
205 |
def analyze_faces_current(image):
|
206 |
global faces_cache
|
207 |
faces_cache["counter"] += 1
|
208 |
current_frame = np.array(image)
|
209 |
+
|
210 |
if faces_cache["counter"] % SKIP_RATE == 0 or faces_cache["boxes"] is None:
|
211 |
boxes, text = compute_faces_overlay(image)
|
212 |
faces_cache["boxes"] = boxes
|
213 |
faces_cache["text"] = text
|
214 |
+
|
215 |
output = current_frame.copy()
|
216 |
if faces_cache["boxes"]:
|
217 |
output = draw_boxes_overlay(output, faces_cache["boxes"], (0, 0, 255))
|
218 |
+
|
219 |
return output, f"Face Detection: {faces_cache['text']}"
|
220 |
|
221 |
# -----------------------------
|
222 |
+
# Custom CSS
|
223 |
# -----------------------------
|
224 |
custom_css = """
|
225 |
@import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@400;700&display=swap');
|
|
|
252 |
"""
|
253 |
|
254 |
# -----------------------------
|
255 |
+
# Create Individual Interfaces
|
256 |
# -----------------------------
|
257 |
posture_interface = gr.Interface(
|
258 |
fn=analyze_posture_current,
|
|
|
260 |
outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Posture Analysis")],
|
261 |
title="Posture Analysis",
|
262 |
description="Detects your posture using MediaPipe.",
|
263 |
+
live=True # Keep only this interface live to avoid multiple heavy computations
|
264 |
)
|
265 |
|
266 |
emotion_interface = gr.Interface(
|
|
|
269 |
outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Emotion Analysis")],
|
270 |
title="Emotion Analysis",
|
271 |
description="Detects facial emotions using FER.",
|
272 |
+
live=False # Turn off streaming to reduce overhead
|
273 |
)
|
274 |
|
275 |
objects_interface = gr.Interface(
|
|
|
278 |
outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Object Detection")],
|
279 |
title="Object Detection",
|
280 |
description="Detects objects using a pretrained Faster R-CNN.",
|
281 |
+
live=False
|
282 |
)
|
283 |
|
284 |
faces_interface = gr.Interface(
|
|
|
287 |
outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Face Detection")],
|
288 |
title="Face Detection",
|
289 |
description="Detects faces using MediaPipe.",
|
290 |
+
live=False
|
291 |
)
|
292 |
|
293 |
# -----------------------------
|
294 |
+
# Create a Tabbed Interface
|
295 |
# -----------------------------
|
296 |
tabbed_interface = gr.TabbedInterface(
|
297 |
interface_list=[posture_interface, emotion_interface, objects_interface, faces_interface],
|
|
|
299 |
)
|
300 |
|
301 |
# -----------------------------
|
302 |
+
# Wrap in a Blocks Layout
|
303 |
# -----------------------------
|
304 |
demo = gr.Blocks(css=custom_css)
|
305 |
with demo:
|
306 |
gr.Markdown("<h1 class='gradio-title'>Real-Time Multi-Analysis App</h1>")
|
307 |
+
gr.Markdown(
|
308 |
+
"<p class='gradio-description'>Experience a high-tech cinematic interface for real-time "
|
309 |
+
"analysis of your posture, emotions, objects, and faces using your webcam.</p>"
|
310 |
+
)
|
311 |
tabbed_interface.render()
|
312 |
|
313 |
if __name__ == "__main__":
|