David Driscoll commited on
Commit
4a53aae
·
1 Parent(s): 74d3d77

Overhaul lag reduction

Browse files
Files changed (1) hide show
  1. app.py +90 -41
app.py CHANGED
@@ -9,9 +9,16 @@ import mediapipe as mp
9
  from fer import FER # Facial emotion recognition
10
 
11
  # -----------------------------
12
- # Configuration: Adjust skip rate (lower = more frequent heavy updates)
13
  # -----------------------------
14
- SKIP_RATE = 5
 
 
 
 
 
 
 
15
 
16
  # -----------------------------
17
  # Global caches for overlay info and frame counters
@@ -34,16 +41,18 @@ face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5)
34
  object_detection_model = models.detection.fasterrcnn_resnet50_fpn(
35
  weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT
36
  )
37
- object_detection_model.eval()
 
38
  obj_transform = transforms.Compose([transforms.ToTensor()])
39
 
 
 
40
  emotion_detector = FER(mtcnn=True)
41
 
42
  # -----------------------------
43
- # Fast Overlay Functions
44
  # -----------------------------
45
  def draw_posture_overlay(raw_frame, landmarks):
46
- # Draw each landmark as a small circle
47
  for (x, y) in landmarks:
48
  cv2.circle(raw_frame, (x, y), 4, (0, 255, 0), -1)
49
  return raw_frame
@@ -55,27 +64,42 @@ def draw_boxes_overlay(raw_frame, boxes, color):
55
 
56
  # -----------------------------
57
  # Heavy (Synchronous) Detection Functions
58
- # These functions compute the overlay info on the current frame.
59
  # -----------------------------
60
  def compute_posture_overlay(image):
61
- frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
62
- h, w, _ = frame.shape
63
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
64
- pose_results = pose.process(frame_rgb)
 
 
 
 
 
 
 
 
65
  if pose_results.pose_landmarks:
66
  landmarks = []
67
  for lm in pose_results.pose_landmarks.landmark:
68
- landmarks.append((int(lm.x * w), int(lm.y * h)))
 
 
 
69
  text = "Posture detected"
70
  else:
71
  landmarks = []
72
  text = "No posture detected"
 
73
  return landmarks, text
74
 
75
  def compute_emotion_overlay(image):
76
- frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
77
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
78
- emotions = emotion_detector.detect_emotions(frame_rgb)
 
 
 
 
79
  if emotions:
80
  top_emotion, score = max(emotions[0]["emotions"].items(), key=lambda x: x[1])
81
  text = f"{top_emotion} ({score:.2f})"
@@ -84,33 +108,48 @@ def compute_emotion_overlay(image):
84
  return text
85
 
86
  def compute_objects_overlay(image):
87
- frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
88
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
89
- image_pil = Image.fromarray(frame_rgb)
90
- img_tensor = obj_transform(image_pil)
 
 
 
 
91
  with torch.no_grad():
92
  detections = object_detection_model([img_tensor])[0]
 
93
  threshold = 0.8
94
  boxes = []
95
  for box, score in zip(detections["boxes"], detections["scores"]):
96
  if score > threshold:
 
 
97
  boxes.append(tuple(box.int().cpu().numpy()))
 
98
  text = f"Detected {len(boxes)} object(s)" if boxes else "No objects detected"
99
  return boxes, text
100
 
101
  def compute_faces_overlay(image):
102
- frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
103
- h, w, _ = frame.shape
104
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
105
- face_results = face_detection.process(frame_rgb)
 
 
 
 
 
106
  boxes = []
107
  if face_results.detections:
108
  for detection in face_results.detections:
109
  bbox = detection.location_data.relative_bounding_box
110
- x = int(bbox.xmin * w)
111
- y = int(bbox.ymin * h)
112
- box_w = int(bbox.width * w)
113
- box_h = int(bbox.height * h)
 
 
114
  boxes.append((x, y, x + box_w, y + box_h))
115
  text = f"Detected {len(boxes)} face(s)"
116
  else:
@@ -118,62 +157,69 @@ def compute_faces_overlay(image):
118
  return boxes, text
119
 
120
  # -----------------------------
121
- # Main Analysis Functions (run every frame)
122
- # They update the cache every SKIP_RATE frames and always return a current frame with overlay.
123
  # -----------------------------
124
  def analyze_posture_current(image):
125
  global posture_cache
126
  posture_cache["counter"] += 1
127
- current_frame = np.array(image) # raw RGB frame (as numpy array)
128
- # Update overlay info every SKIP_RATE frames
129
  if posture_cache["counter"] % SKIP_RATE == 0 or posture_cache["landmarks"] is None:
130
  landmarks, text = compute_posture_overlay(image)
131
  posture_cache["landmarks"] = landmarks
132
  posture_cache["text"] = text
133
- # Draw cached landmarks on the current frame copy
134
  output = current_frame.copy()
135
  if posture_cache["landmarks"]:
136
  output = draw_posture_overlay(output, posture_cache["landmarks"])
 
137
  return output, f"Posture Analysis: {posture_cache['text']}"
138
 
139
  def analyze_emotion_current(image):
140
  global emotion_cache
141
  emotion_cache["counter"] += 1
142
  current_frame = np.array(image)
 
143
  if emotion_cache["counter"] % SKIP_RATE == 0 or emotion_cache["text"] is None:
144
  text = compute_emotion_overlay(image)
145
  emotion_cache["text"] = text
146
- # For emotion, we don't overlay anything; just return the current frame.
147
  return current_frame, f"Emotion Analysis: {emotion_cache['text']}"
148
 
149
  def analyze_objects_current(image):
150
  global objects_cache
151
  objects_cache["counter"] += 1
152
  current_frame = np.array(image)
 
153
  if objects_cache["counter"] % SKIP_RATE == 0 or objects_cache["boxes"] is None:
154
  boxes, text = compute_objects_overlay(image)
155
  objects_cache["boxes"] = boxes
156
  objects_cache["text"] = text
 
157
  output = current_frame.copy()
158
  if objects_cache["boxes"]:
159
  output = draw_boxes_overlay(output, objects_cache["boxes"], (255, 255, 0))
 
160
  return output, f"Object Detection: {objects_cache['text']}"
161
 
162
  def analyze_faces_current(image):
163
  global faces_cache
164
  faces_cache["counter"] += 1
165
  current_frame = np.array(image)
 
166
  if faces_cache["counter"] % SKIP_RATE == 0 or faces_cache["boxes"] is None:
167
  boxes, text = compute_faces_overlay(image)
168
  faces_cache["boxes"] = boxes
169
  faces_cache["text"] = text
 
170
  output = current_frame.copy()
171
  if faces_cache["boxes"]:
172
  output = draw_boxes_overlay(output, faces_cache["boxes"], (0, 0, 255))
 
173
  return output, f"Face Detection: {faces_cache['text']}"
174
 
175
  # -----------------------------
176
- # Custom CSS for a High-Tech Look (White Font)
177
  # -----------------------------
178
  custom_css = """
179
  @import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@400;700&display=swap');
@@ -206,7 +252,7 @@ body {
206
  """
207
 
208
  # -----------------------------
209
- # Create Individual Interfaces for Each Analysis
210
  # -----------------------------
211
  posture_interface = gr.Interface(
212
  fn=analyze_posture_current,
@@ -214,7 +260,7 @@ posture_interface = gr.Interface(
214
  outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Posture Analysis")],
215
  title="Posture Analysis",
216
  description="Detects your posture using MediaPipe.",
217
- live=True
218
  )
219
 
220
  emotion_interface = gr.Interface(
@@ -223,7 +269,7 @@ emotion_interface = gr.Interface(
223
  outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Emotion Analysis")],
224
  title="Emotion Analysis",
225
  description="Detects facial emotions using FER.",
226
- live=True
227
  )
228
 
229
  objects_interface = gr.Interface(
@@ -232,7 +278,7 @@ objects_interface = gr.Interface(
232
  outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Object Detection")],
233
  title="Object Detection",
234
  description="Detects objects using a pretrained Faster R-CNN.",
235
- live=True
236
  )
237
 
238
  faces_interface = gr.Interface(
@@ -241,11 +287,11 @@ faces_interface = gr.Interface(
241
  outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Face Detection")],
242
  title="Face Detection",
243
  description="Detects faces using MediaPipe.",
244
- live=True
245
  )
246
 
247
  # -----------------------------
248
- # Create a Tabbed Interface for All Analyses
249
  # -----------------------------
250
  tabbed_interface = gr.TabbedInterface(
251
  interface_list=[posture_interface, emotion_interface, objects_interface, faces_interface],
@@ -253,12 +299,15 @@ tabbed_interface = gr.TabbedInterface(
253
  )
254
 
255
  # -----------------------------
256
- # Wrap Everything in a Blocks Layout with Custom CSS
257
  # -----------------------------
258
  demo = gr.Blocks(css=custom_css)
259
  with demo:
260
  gr.Markdown("<h1 class='gradio-title'>Real-Time Multi-Analysis App</h1>")
261
- gr.Markdown("<p class='gradio-description'>Experience a high-tech cinematic interface for real-time analysis of your posture, emotions, objects, and faces using your webcam.</p>")
 
 
 
262
  tabbed_interface.render()
263
 
264
  if __name__ == "__main__":
 
9
  from fer import FER # Facial emotion recognition
10
 
11
  # -----------------------------
12
+ # Configuration
13
  # -----------------------------
14
+ # 1) Increase skip rate
15
+ SKIP_RATE = 15
16
+
17
+ # 2) Use GPU if available
18
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19
+
20
+ # 3) Desired input size for faster inference
21
+ DESIRED_SIZE = (640, 480)
22
 
23
  # -----------------------------
24
  # Global caches for overlay info and frame counters
 
41
  object_detection_model = models.detection.fasterrcnn_resnet50_fpn(
42
  weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT
43
  )
44
+ object_detection_model.eval().to(device) # Move model to GPU (if available)
45
+
46
  obj_transform = transforms.Compose([transforms.ToTensor()])
47
 
48
+ # If the FER library supports GPU, it may pick it up automatically.
49
+ # Some versions allow device specification, e.g. FER(mtcnn=True, device=device).
50
  emotion_detector = FER(mtcnn=True)
51
 
52
  # -----------------------------
53
+ # Overlay Drawing Functions
54
  # -----------------------------
55
  def draw_posture_overlay(raw_frame, landmarks):
 
56
  for (x, y) in landmarks:
57
  cv2.circle(raw_frame, (x, y), 4, (0, 255, 0), -1)
58
  return raw_frame
 
64
 
65
  # -----------------------------
66
  # Heavy (Synchronous) Detection Functions
 
67
  # -----------------------------
68
  def compute_posture_overlay(image):
69
+ # Convert to BGR for MediaPipe
70
+ frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
71
+ h, w, _ = frame_bgr.shape
72
+
73
+ # 2) Downscale before processing (optional for posture)
74
+ frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
75
+ small_h, small_w, _ = frame_bgr_small.shape
76
+
77
+ frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
78
+ pose_results = pose.process(frame_rgb_small)
79
+
80
+ # Scale landmarks back up to original size if needed
81
  if pose_results.pose_landmarks:
82
  landmarks = []
83
  for lm in pose_results.pose_landmarks.landmark:
84
+ # Rescale from the smaller frame to the original size
85
+ x = int(lm.x * small_w * (w / small_w))
86
+ y = int(lm.y * small_h * (h / small_h))
87
+ landmarks.append((x, y))
88
  text = "Posture detected"
89
  else:
90
  landmarks = []
91
  text = "No posture detected"
92
+
93
  return landmarks, text
94
 
95
  def compute_emotion_overlay(image):
96
+ # Convert to BGR
97
+ frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
98
+ # 2) Downscale
99
+ frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
100
+ frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
101
+
102
+ emotions = emotion_detector.detect_emotions(frame_rgb_small)
103
  if emotions:
104
  top_emotion, score = max(emotions[0]["emotions"].items(), key=lambda x: x[1])
105
  text = f"{top_emotion} ({score:.2f})"
 
108
  return text
109
 
110
  def compute_objects_overlay(image):
111
+ frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
112
+ # 2) Downscale
113
+ frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
114
+ frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
115
+
116
+ image_pil = Image.fromarray(frame_rgb_small)
117
+ img_tensor = obj_transform(image_pil).to(device)
118
+
119
  with torch.no_grad():
120
  detections = object_detection_model([img_tensor])[0]
121
+
122
  threshold = 0.8
123
  boxes = []
124
  for box, score in zip(detections["boxes"], detections["scores"]):
125
  if score > threshold:
126
+ # box is in the scaled-down coordinates;
127
+ # you may want to scale them back to the original if needed
128
  boxes.append(tuple(box.int().cpu().numpy()))
129
+
130
  text = f"Detected {len(boxes)} object(s)" if boxes else "No objects detected"
131
  return boxes, text
132
 
133
  def compute_faces_overlay(image):
134
+ frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
135
+ h, w, _ = frame_bgr.shape
136
+ # 2) Downscale
137
+ frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
138
+ small_h, small_w, _ = frame_bgr_small.shape
139
+
140
+ frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
141
+ face_results = face_detection.process(frame_rgb_small)
142
+
143
  boxes = []
144
  if face_results.detections:
145
  for detection in face_results.detections:
146
  bbox = detection.location_data.relative_bounding_box
147
+ x = int(bbox.xmin * small_w)
148
+ y = int(bbox.ymin * small_h)
149
+ box_w = int(bbox.width * small_w)
150
+ box_h = int(bbox.height * small_h)
151
+ # Scale bounding box coords back to original if you need full resolution
152
+ # E.g., x_original = int(x * (w / small_w)), etc.
153
  boxes.append((x, y, x + box_w, y + box_h))
154
  text = f"Detected {len(boxes)} face(s)"
155
  else:
 
157
  return boxes, text
158
 
159
  # -----------------------------
160
+ # Main Analysis Functions
 
161
  # -----------------------------
162
  def analyze_posture_current(image):
163
  global posture_cache
164
  posture_cache["counter"] += 1
165
+ current_frame = np.array(image)
166
+
167
  if posture_cache["counter"] % SKIP_RATE == 0 or posture_cache["landmarks"] is None:
168
  landmarks, text = compute_posture_overlay(image)
169
  posture_cache["landmarks"] = landmarks
170
  posture_cache["text"] = text
171
+
172
  output = current_frame.copy()
173
  if posture_cache["landmarks"]:
174
  output = draw_posture_overlay(output, posture_cache["landmarks"])
175
+
176
  return output, f"Posture Analysis: {posture_cache['text']}"
177
 
178
  def analyze_emotion_current(image):
179
  global emotion_cache
180
  emotion_cache["counter"] += 1
181
  current_frame = np.array(image)
182
+
183
  if emotion_cache["counter"] % SKIP_RATE == 0 or emotion_cache["text"] is None:
184
  text = compute_emotion_overlay(image)
185
  emotion_cache["text"] = text
186
+
187
  return current_frame, f"Emotion Analysis: {emotion_cache['text']}"
188
 
189
  def analyze_objects_current(image):
190
  global objects_cache
191
  objects_cache["counter"] += 1
192
  current_frame = np.array(image)
193
+
194
  if objects_cache["counter"] % SKIP_RATE == 0 or objects_cache["boxes"] is None:
195
  boxes, text = compute_objects_overlay(image)
196
  objects_cache["boxes"] = boxes
197
  objects_cache["text"] = text
198
+
199
  output = current_frame.copy()
200
  if objects_cache["boxes"]:
201
  output = draw_boxes_overlay(output, objects_cache["boxes"], (255, 255, 0))
202
+
203
  return output, f"Object Detection: {objects_cache['text']}"
204
 
205
  def analyze_faces_current(image):
206
  global faces_cache
207
  faces_cache["counter"] += 1
208
  current_frame = np.array(image)
209
+
210
  if faces_cache["counter"] % SKIP_RATE == 0 or faces_cache["boxes"] is None:
211
  boxes, text = compute_faces_overlay(image)
212
  faces_cache["boxes"] = boxes
213
  faces_cache["text"] = text
214
+
215
  output = current_frame.copy()
216
  if faces_cache["boxes"]:
217
  output = draw_boxes_overlay(output, faces_cache["boxes"], (0, 0, 255))
218
+
219
  return output, f"Face Detection: {faces_cache['text']}"
220
 
221
  # -----------------------------
222
+ # Custom CSS
223
  # -----------------------------
224
  custom_css = """
225
  @import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@400;700&display=swap');
 
252
  """
253
 
254
  # -----------------------------
255
+ # Create Individual Interfaces
256
  # -----------------------------
257
  posture_interface = gr.Interface(
258
  fn=analyze_posture_current,
 
260
  outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Posture Analysis")],
261
  title="Posture Analysis",
262
  description="Detects your posture using MediaPipe.",
263
+ live=True # Keep only this interface live to avoid multiple heavy computations
264
  )
265
 
266
  emotion_interface = gr.Interface(
 
269
  outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Emotion Analysis")],
270
  title="Emotion Analysis",
271
  description="Detects facial emotions using FER.",
272
+ live=False # Turn off streaming to reduce overhead
273
  )
274
 
275
  objects_interface = gr.Interface(
 
278
  outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Object Detection")],
279
  title="Object Detection",
280
  description="Detects objects using a pretrained Faster R-CNN.",
281
+ live=False
282
  )
283
 
284
  faces_interface = gr.Interface(
 
287
  outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Face Detection")],
288
  title="Face Detection",
289
  description="Detects faces using MediaPipe.",
290
+ live=False
291
  )
292
 
293
  # -----------------------------
294
+ # Create a Tabbed Interface
295
  # -----------------------------
296
  tabbed_interface = gr.TabbedInterface(
297
  interface_list=[posture_interface, emotion_interface, objects_interface, faces_interface],
 
299
  )
300
 
301
  # -----------------------------
302
+ # Wrap in a Blocks Layout
303
  # -----------------------------
304
  demo = gr.Blocks(css=custom_css)
305
  with demo:
306
  gr.Markdown("<h1 class='gradio-title'>Real-Time Multi-Analysis App</h1>")
307
+ gr.Markdown(
308
+ "<p class='gradio-description'>Experience a high-tech cinematic interface for real-time "
309
+ "analysis of your posture, emotions, objects, and faces using your webcam.</p>"
310
+ )
311
  tabbed_interface.render()
312
 
313
  if __name__ == "__main__":