David Driscoll
commited on
Commit
·
f6a647b
1
Parent(s):
4a53aae
Video to image, text change
Browse files
app.py
CHANGED
@@ -11,13 +11,13 @@ from fer import FER # Facial emotion recognition
|
|
11 |
# -----------------------------
|
12 |
# Configuration
|
13 |
# -----------------------------
|
14 |
-
#
|
15 |
-
SKIP_RATE =
|
16 |
|
17 |
-
#
|
18 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
19 |
|
20 |
-
#
|
21 |
DESIRED_SIZE = (640, 480)
|
22 |
|
23 |
# -----------------------------
|
@@ -45,16 +45,16 @@ object_detection_model.eval().to(device) # Move model to GPU (if available)
|
|
45 |
|
46 |
obj_transform = transforms.Compose([transforms.ToTensor()])
|
47 |
|
48 |
-
#
|
49 |
-
# Some versions allow device specification, e.g. FER(mtcnn=True, device=device).
|
50 |
emotion_detector = FER(mtcnn=True)
|
51 |
|
52 |
# -----------------------------
|
53 |
# Overlay Drawing Functions
|
54 |
# -----------------------------
|
55 |
def draw_posture_overlay(raw_frame, landmarks):
|
|
|
56 |
for (x, y) in landmarks:
|
57 |
-
cv2.circle(raw_frame, (x, y), 4, (
|
58 |
return raw_frame
|
59 |
|
60 |
def draw_boxes_overlay(raw_frame, boxes, color):
|
@@ -66,22 +66,18 @@ def draw_boxes_overlay(raw_frame, boxes, color):
|
|
66 |
# Heavy (Synchronous) Detection Functions
|
67 |
# -----------------------------
|
68 |
def compute_posture_overlay(image):
|
69 |
-
# Convert to BGR for MediaPipe
|
70 |
frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
71 |
h, w, _ = frame_bgr.shape
|
72 |
-
|
73 |
-
# 2) Downscale before processing (optional for posture)
|
74 |
frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
|
75 |
small_h, small_w, _ = frame_bgr_small.shape
|
76 |
|
77 |
frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
|
78 |
pose_results = pose.process(frame_rgb_small)
|
79 |
|
80 |
-
# Scale landmarks back up to original size if needed
|
81 |
if pose_results.pose_landmarks:
|
82 |
landmarks = []
|
83 |
for lm in pose_results.pose_landmarks.landmark:
|
84 |
-
#
|
85 |
x = int(lm.x * small_w * (w / small_w))
|
86 |
y = int(lm.y * small_h * (h / small_h))
|
87 |
landmarks.append((x, y))
|
@@ -93,9 +89,7 @@ def compute_posture_overlay(image):
|
|
93 |
return landmarks, text
|
94 |
|
95 |
def compute_emotion_overlay(image):
|
96 |
-
# Convert to BGR
|
97 |
frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
98 |
-
# 2) Downscale
|
99 |
frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
|
100 |
frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
|
101 |
|
@@ -109,7 +103,6 @@ def compute_emotion_overlay(image):
|
|
109 |
|
110 |
def compute_objects_overlay(image):
|
111 |
frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
112 |
-
# 2) Downscale
|
113 |
frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
|
114 |
frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
|
115 |
|
@@ -123,17 +116,13 @@ def compute_objects_overlay(image):
|
|
123 |
boxes = []
|
124 |
for box, score in zip(detections["boxes"], detections["scores"]):
|
125 |
if score > threshold:
|
126 |
-
# box is in the scaled-down coordinates;
|
127 |
-
# you may want to scale them back to the original if needed
|
128 |
boxes.append(tuple(box.int().cpu().numpy()))
|
129 |
-
|
130 |
text = f"Detected {len(boxes)} object(s)" if boxes else "No objects detected"
|
131 |
return boxes, text
|
132 |
|
133 |
def compute_faces_overlay(image):
|
134 |
frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
135 |
h, w, _ = frame_bgr.shape
|
136 |
-
# 2) Downscale
|
137 |
frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
|
138 |
small_h, small_w, _ = frame_bgr_small.shape
|
139 |
|
@@ -148,8 +137,6 @@ def compute_faces_overlay(image):
|
|
148 |
y = int(bbox.ymin * small_h)
|
149 |
box_w = int(bbox.width * small_w)
|
150 |
box_h = int(bbox.height * small_h)
|
151 |
-
# Scale bounding box coords back to original if you need full resolution
|
152 |
-
# E.g., x_original = int(x * (w / small_w)), etc.
|
153 |
boxes.append((x, y, x + box_w, y + box_h))
|
154 |
text = f"Detected {len(boxes)} face(s)"
|
155 |
else:
|
@@ -157,13 +144,12 @@ def compute_faces_overlay(image):
|
|
157 |
return boxes, text
|
158 |
|
159 |
# -----------------------------
|
160 |
-
# Main Analysis Functions
|
161 |
# -----------------------------
|
162 |
def analyze_posture_current(image):
|
163 |
global posture_cache
|
164 |
posture_cache["counter"] += 1
|
165 |
current_frame = np.array(image)
|
166 |
-
|
167 |
if posture_cache["counter"] % SKIP_RATE == 0 or posture_cache["landmarks"] is None:
|
168 |
landmarks, text = compute_posture_overlay(image)
|
169 |
posture_cache["landmarks"] = landmarks
|
@@ -173,24 +159,22 @@ def analyze_posture_current(image):
|
|
173 |
if posture_cache["landmarks"]:
|
174 |
output = draw_posture_overlay(output, posture_cache["landmarks"])
|
175 |
|
176 |
-
return output, f"Posture Analysis: {posture_cache['text']}"
|
177 |
|
178 |
def analyze_emotion_current(image):
|
179 |
global emotion_cache
|
180 |
emotion_cache["counter"] += 1
|
181 |
current_frame = np.array(image)
|
182 |
-
|
183 |
if emotion_cache["counter"] % SKIP_RATE == 0 or emotion_cache["text"] is None:
|
184 |
text = compute_emotion_overlay(image)
|
185 |
emotion_cache["text"] = text
|
186 |
|
187 |
-
return current_frame, f"Emotion Analysis: {emotion_cache['text']}"
|
188 |
|
189 |
def analyze_objects_current(image):
|
190 |
global objects_cache
|
191 |
objects_cache["counter"] += 1
|
192 |
current_frame = np.array(image)
|
193 |
-
|
194 |
if objects_cache["counter"] % SKIP_RATE == 0 or objects_cache["boxes"] is None:
|
195 |
boxes, text = compute_objects_overlay(image)
|
196 |
objects_cache["boxes"] = boxes
|
@@ -199,14 +183,12 @@ def analyze_objects_current(image):
|
|
199 |
output = current_frame.copy()
|
200 |
if objects_cache["boxes"]:
|
201 |
output = draw_boxes_overlay(output, objects_cache["boxes"], (255, 255, 0))
|
202 |
-
|
203 |
-
return output, f"Object Detection: {objects_cache['text']}"
|
204 |
|
205 |
def analyze_faces_current(image):
|
206 |
global faces_cache
|
207 |
faces_cache["counter"] += 1
|
208 |
current_frame = np.array(image)
|
209 |
-
|
210 |
if faces_cache["counter"] % SKIP_RATE == 0 or faces_cache["boxes"] is None:
|
211 |
boxes, text = compute_faces_overlay(image)
|
212 |
faces_cache["boxes"] = boxes
|
@@ -215,8 +197,38 @@ def analyze_faces_current(image):
|
|
215 |
output = current_frame.copy()
|
216 |
if faces_cache["boxes"]:
|
217 |
output = draw_boxes_overlay(output, faces_cache["boxes"], (0, 0, 255))
|
218 |
-
|
219 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
|
221 |
# -----------------------------
|
222 |
# Custom CSS
|
@@ -252,30 +264,30 @@ body {
|
|
252 |
"""
|
253 |
|
254 |
# -----------------------------
|
255 |
-
# Create Individual Interfaces
|
256 |
# -----------------------------
|
257 |
posture_interface = gr.Interface(
|
258 |
fn=analyze_posture_current,
|
259 |
-
inputs=gr.Image(
|
260 |
-
outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.
|
261 |
title="Posture Analysis",
|
262 |
description="Detects your posture using MediaPipe.",
|
263 |
-
live=
|
264 |
)
|
265 |
|
266 |
emotion_interface = gr.Interface(
|
267 |
fn=analyze_emotion_current,
|
268 |
-
inputs=gr.Image(
|
269 |
-
outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.
|
270 |
title="Emotion Analysis",
|
271 |
description="Detects facial emotions using FER.",
|
272 |
-
live=False
|
273 |
)
|
274 |
|
275 |
objects_interface = gr.Interface(
|
276 |
fn=analyze_objects_current,
|
277 |
-
inputs=gr.Image(
|
278 |
-
outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.
|
279 |
title="Object Detection",
|
280 |
description="Detects objects using a pretrained Faster R-CNN.",
|
281 |
live=False
|
@@ -283,19 +295,28 @@ objects_interface = gr.Interface(
|
|
283 |
|
284 |
faces_interface = gr.Interface(
|
285 |
fn=analyze_faces_current,
|
286 |
-
inputs=gr.Image(
|
287 |
-
outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.
|
288 |
title="Face Detection",
|
289 |
description="Detects faces using MediaPipe.",
|
290 |
live=False
|
291 |
)
|
292 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
293 |
# -----------------------------
|
294 |
# Create a Tabbed Interface
|
295 |
# -----------------------------
|
296 |
tabbed_interface = gr.TabbedInterface(
|
297 |
-
interface_list=[posture_interface, emotion_interface, objects_interface, faces_interface],
|
298 |
-
tab_names=["Posture", "Emotion", "Objects", "Faces"]
|
299 |
)
|
300 |
|
301 |
# -----------------------------
|
@@ -303,10 +324,9 @@ tabbed_interface = gr.TabbedInterface(
|
|
303 |
# -----------------------------
|
304 |
demo = gr.Blocks(css=custom_css)
|
305 |
with demo:
|
306 |
-
gr.Markdown("<h1 class='gradio-title'>
|
307 |
gr.Markdown(
|
308 |
-
"<p class='gradio-description'>
|
309 |
-
"analysis of your posture, emotions, objects, and faces using your webcam.</p>"
|
310 |
)
|
311 |
tabbed_interface.render()
|
312 |
|
|
|
11 |
# -----------------------------
|
12 |
# Configuration
|
13 |
# -----------------------------
|
14 |
+
# For image processing, always run the analysis (no frame skipping)
|
15 |
+
SKIP_RATE = 1
|
16 |
|
17 |
+
# Use GPU if available
|
18 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
19 |
|
20 |
+
# Desired input size for faster inference
|
21 |
DESIRED_SIZE = (640, 480)
|
22 |
|
23 |
# -----------------------------
|
|
|
45 |
|
46 |
obj_transform = transforms.Compose([transforms.ToTensor()])
|
47 |
|
48 |
+
# Initialize the FER emotion detector
|
|
|
49 |
emotion_detector = FER(mtcnn=True)
|
50 |
|
51 |
# -----------------------------
|
52 |
# Overlay Drawing Functions
|
53 |
# -----------------------------
|
54 |
def draw_posture_overlay(raw_frame, landmarks):
|
55 |
+
# Draw circles for each landmark using lime green (BGR: (50,205,50))
|
56 |
for (x, y) in landmarks:
|
57 |
+
cv2.circle(raw_frame, (x, y), 4, (50, 205, 50), -1)
|
58 |
return raw_frame
|
59 |
|
60 |
def draw_boxes_overlay(raw_frame, boxes, color):
|
|
|
66 |
# Heavy (Synchronous) Detection Functions
|
67 |
# -----------------------------
|
68 |
def compute_posture_overlay(image):
|
|
|
69 |
frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
70 |
h, w, _ = frame_bgr.shape
|
|
|
|
|
71 |
frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
|
72 |
small_h, small_w, _ = frame_bgr_small.shape
|
73 |
|
74 |
frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
|
75 |
pose_results = pose.process(frame_rgb_small)
|
76 |
|
|
|
77 |
if pose_results.pose_landmarks:
|
78 |
landmarks = []
|
79 |
for lm in pose_results.pose_landmarks.landmark:
|
80 |
+
# Scale landmarks back to the original image size
|
81 |
x = int(lm.x * small_w * (w / small_w))
|
82 |
y = int(lm.y * small_h * (h / small_h))
|
83 |
landmarks.append((x, y))
|
|
|
89 |
return landmarks, text
|
90 |
|
91 |
def compute_emotion_overlay(image):
|
|
|
92 |
frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
|
|
93 |
frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
|
94 |
frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
|
95 |
|
|
|
103 |
|
104 |
def compute_objects_overlay(image):
|
105 |
frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
|
|
106 |
frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
|
107 |
frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
|
108 |
|
|
|
116 |
boxes = []
|
117 |
for box, score in zip(detections["boxes"], detections["scores"]):
|
118 |
if score > threshold:
|
|
|
|
|
119 |
boxes.append(tuple(box.int().cpu().numpy()))
|
|
|
120 |
text = f"Detected {len(boxes)} object(s)" if boxes else "No objects detected"
|
121 |
return boxes, text
|
122 |
|
123 |
def compute_faces_overlay(image):
|
124 |
frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
125 |
h, w, _ = frame_bgr.shape
|
|
|
126 |
frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
|
127 |
small_h, small_w, _ = frame_bgr_small.shape
|
128 |
|
|
|
137 |
y = int(bbox.ymin * small_h)
|
138 |
box_w = int(bbox.width * small_w)
|
139 |
box_h = int(bbox.height * small_h)
|
|
|
|
|
140 |
boxes.append((x, y, x + box_w, y + box_h))
|
141 |
text = f"Detected {len(boxes)} face(s)"
|
142 |
else:
|
|
|
144 |
return boxes, text
|
145 |
|
146 |
# -----------------------------
|
147 |
+
# Main Analysis Functions for Single Image
|
148 |
# -----------------------------
|
149 |
def analyze_posture_current(image):
|
150 |
global posture_cache
|
151 |
posture_cache["counter"] += 1
|
152 |
current_frame = np.array(image)
|
|
|
153 |
if posture_cache["counter"] % SKIP_RATE == 0 or posture_cache["landmarks"] is None:
|
154 |
landmarks, text = compute_posture_overlay(image)
|
155 |
posture_cache["landmarks"] = landmarks
|
|
|
159 |
if posture_cache["landmarks"]:
|
160 |
output = draw_posture_overlay(output, posture_cache["landmarks"])
|
161 |
|
162 |
+
return output, f"<div style='color: lime;'>Posture Analysis: {posture_cache['text']}</div>"
|
163 |
|
164 |
def analyze_emotion_current(image):
|
165 |
global emotion_cache
|
166 |
emotion_cache["counter"] += 1
|
167 |
current_frame = np.array(image)
|
|
|
168 |
if emotion_cache["counter"] % SKIP_RATE == 0 or emotion_cache["text"] is None:
|
169 |
text = compute_emotion_overlay(image)
|
170 |
emotion_cache["text"] = text
|
171 |
|
172 |
+
return current_frame, f"<div style='color: lime;'>Emotion Analysis: {emotion_cache['text']}</div>"
|
173 |
|
174 |
def analyze_objects_current(image):
|
175 |
global objects_cache
|
176 |
objects_cache["counter"] += 1
|
177 |
current_frame = np.array(image)
|
|
|
178 |
if objects_cache["counter"] % SKIP_RATE == 0 or objects_cache["boxes"] is None:
|
179 |
boxes, text = compute_objects_overlay(image)
|
180 |
objects_cache["boxes"] = boxes
|
|
|
183 |
output = current_frame.copy()
|
184 |
if objects_cache["boxes"]:
|
185 |
output = draw_boxes_overlay(output, objects_cache["boxes"], (255, 255, 0))
|
186 |
+
return output, f"<div style='color: lime;'>Object Detection: {objects_cache['text']}</div>"
|
|
|
187 |
|
188 |
def analyze_faces_current(image):
|
189 |
global faces_cache
|
190 |
faces_cache["counter"] += 1
|
191 |
current_frame = np.array(image)
|
|
|
192 |
if faces_cache["counter"] % SKIP_RATE == 0 or faces_cache["boxes"] is None:
|
193 |
boxes, text = compute_faces_overlay(image)
|
194 |
faces_cache["boxes"] = boxes
|
|
|
197 |
output = current_frame.copy()
|
198 |
if faces_cache["boxes"]:
|
199 |
output = draw_boxes_overlay(output, faces_cache["boxes"], (0, 0, 255))
|
200 |
+
return output, f"<div style='color: lime;'>Face Detection: {faces_cache['text']}</div>"
|
201 |
+
|
202 |
+
def analyze_all(image):
|
203 |
+
# Run all analyses on the same image
|
204 |
+
current_frame = np.array(image).copy()
|
205 |
+
|
206 |
+
# Posture Analysis
|
207 |
+
landmarks, posture_text = compute_posture_overlay(image)
|
208 |
+
if landmarks:
|
209 |
+
current_frame = draw_posture_overlay(current_frame, landmarks)
|
210 |
+
|
211 |
+
# Emotion Analysis
|
212 |
+
emotion_text = compute_emotion_overlay(image)
|
213 |
+
|
214 |
+
# Object Detection
|
215 |
+
boxes_obj, objects_text = compute_objects_overlay(image)
|
216 |
+
if boxes_obj:
|
217 |
+
current_frame = draw_boxes_overlay(current_frame, boxes_obj, (255, 255, 0))
|
218 |
+
|
219 |
+
# Face Detection
|
220 |
+
boxes_face, faces_text = compute_faces_overlay(image)
|
221 |
+
if boxes_face:
|
222 |
+
current_frame = draw_boxes_overlay(current_frame, boxes_face, (0, 0, 255))
|
223 |
+
|
224 |
+
combined_text = (
|
225 |
+
f"Posture Analysis: {posture_text}<br>"
|
226 |
+
f"Emotion Analysis: {emotion_text}<br>"
|
227 |
+
f"Object Detection: {objects_text}<br>"
|
228 |
+
f"Face Detection: {faces_text}"
|
229 |
+
)
|
230 |
+
combined_text_html = f"<div style='color: lime;'>{combined_text}</div>"
|
231 |
+
return current_frame, combined_text_html
|
232 |
|
233 |
# -----------------------------
|
234 |
# Custom CSS
|
|
|
264 |
"""
|
265 |
|
266 |
# -----------------------------
|
267 |
+
# Create Individual Interfaces for Image Processing
|
268 |
# -----------------------------
|
269 |
posture_interface = gr.Interface(
|
270 |
fn=analyze_posture_current,
|
271 |
+
inputs=gr.Image(label="Upload an Image for Posture Analysis"),
|
272 |
+
outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.HTML(label="Posture Analysis")],
|
273 |
title="Posture Analysis",
|
274 |
description="Detects your posture using MediaPipe.",
|
275 |
+
live=False
|
276 |
)
|
277 |
|
278 |
emotion_interface = gr.Interface(
|
279 |
fn=analyze_emotion_current,
|
280 |
+
inputs=gr.Image(label="Upload an Image for Emotion Analysis"),
|
281 |
+
outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.HTML(label="Emotion Analysis")],
|
282 |
title="Emotion Analysis",
|
283 |
description="Detects facial emotions using FER.",
|
284 |
+
live=False
|
285 |
)
|
286 |
|
287 |
objects_interface = gr.Interface(
|
288 |
fn=analyze_objects_current,
|
289 |
+
inputs=gr.Image(label="Upload an Image for Object Detection"),
|
290 |
+
outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.HTML(label="Object Detection")],
|
291 |
title="Object Detection",
|
292 |
description="Detects objects using a pretrained Faster R-CNN.",
|
293 |
live=False
|
|
|
295 |
|
296 |
faces_interface = gr.Interface(
|
297 |
fn=analyze_faces_current,
|
298 |
+
inputs=gr.Image(label="Upload an Image for Face Detection"),
|
299 |
+
outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.HTML(label="Face Detection")],
|
300 |
title="Face Detection",
|
301 |
description="Detects faces using MediaPipe.",
|
302 |
live=False
|
303 |
)
|
304 |
|
305 |
+
all_interface = gr.Interface(
|
306 |
+
fn=analyze_all,
|
307 |
+
inputs=gr.Image(label="Upload an Image for All Inferences"),
|
308 |
+
outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.HTML(label="Combined Analysis")],
|
309 |
+
title="All Inferences",
|
310 |
+
description="Runs posture, emotion, object, and face detection all at once.",
|
311 |
+
live=False
|
312 |
+
)
|
313 |
+
|
314 |
# -----------------------------
|
315 |
# Create a Tabbed Interface
|
316 |
# -----------------------------
|
317 |
tabbed_interface = gr.TabbedInterface(
|
318 |
+
interface_list=[posture_interface, emotion_interface, objects_interface, faces_interface, all_interface],
|
319 |
+
tab_names=["Posture", "Emotion", "Objects", "Faces", "All Inferences"]
|
320 |
)
|
321 |
|
322 |
# -----------------------------
|
|
|
324 |
# -----------------------------
|
325 |
demo = gr.Blocks(css=custom_css)
|
326 |
with demo:
|
327 |
+
gr.Markdown("<h1 class='gradio-title'>Multi-Analysis Image App</h1>")
|
328 |
gr.Markdown(
|
329 |
+
"<p class='gradio-description'>Upload an image to run analysis for posture, emotions, objects, and faces.</p>"
|
|
|
330 |
)
|
331 |
tabbed_interface.render()
|
332 |
|