David Driscoll commited on
Commit
f6a647b
·
1 Parent(s): 4a53aae

Video to image, text change

Browse files
Files changed (1) hide show
  1. app.py +68 -48
app.py CHANGED
@@ -11,13 +11,13 @@ from fer import FER # Facial emotion recognition
11
  # -----------------------------
12
  # Configuration
13
  # -----------------------------
14
- # 1) Increase skip rate
15
- SKIP_RATE = 15
16
 
17
- # 2) Use GPU if available
18
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19
 
20
- # 3) Desired input size for faster inference
21
  DESIRED_SIZE = (640, 480)
22
 
23
  # -----------------------------
@@ -45,16 +45,16 @@ object_detection_model.eval().to(device) # Move model to GPU (if available)
45
 
46
  obj_transform = transforms.Compose([transforms.ToTensor()])
47
 
48
- # If the FER library supports GPU, it may pick it up automatically.
49
- # Some versions allow device specification, e.g. FER(mtcnn=True, device=device).
50
  emotion_detector = FER(mtcnn=True)
51
 
52
  # -----------------------------
53
  # Overlay Drawing Functions
54
  # -----------------------------
55
  def draw_posture_overlay(raw_frame, landmarks):
 
56
  for (x, y) in landmarks:
57
- cv2.circle(raw_frame, (x, y), 4, (0, 255, 0), -1)
58
  return raw_frame
59
 
60
  def draw_boxes_overlay(raw_frame, boxes, color):
@@ -66,22 +66,18 @@ def draw_boxes_overlay(raw_frame, boxes, color):
66
  # Heavy (Synchronous) Detection Functions
67
  # -----------------------------
68
  def compute_posture_overlay(image):
69
- # Convert to BGR for MediaPipe
70
  frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
71
  h, w, _ = frame_bgr.shape
72
-
73
- # 2) Downscale before processing (optional for posture)
74
  frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
75
  small_h, small_w, _ = frame_bgr_small.shape
76
 
77
  frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
78
  pose_results = pose.process(frame_rgb_small)
79
 
80
- # Scale landmarks back up to original size if needed
81
  if pose_results.pose_landmarks:
82
  landmarks = []
83
  for lm in pose_results.pose_landmarks.landmark:
84
- # Rescale from the smaller frame to the original size
85
  x = int(lm.x * small_w * (w / small_w))
86
  y = int(lm.y * small_h * (h / small_h))
87
  landmarks.append((x, y))
@@ -93,9 +89,7 @@ def compute_posture_overlay(image):
93
  return landmarks, text
94
 
95
  def compute_emotion_overlay(image):
96
- # Convert to BGR
97
  frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
98
- # 2) Downscale
99
  frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
100
  frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
101
 
@@ -109,7 +103,6 @@ def compute_emotion_overlay(image):
109
 
110
  def compute_objects_overlay(image):
111
  frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
112
- # 2) Downscale
113
  frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
114
  frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
115
 
@@ -123,17 +116,13 @@ def compute_objects_overlay(image):
123
  boxes = []
124
  for box, score in zip(detections["boxes"], detections["scores"]):
125
  if score > threshold:
126
- # box is in the scaled-down coordinates;
127
- # you may want to scale them back to the original if needed
128
  boxes.append(tuple(box.int().cpu().numpy()))
129
-
130
  text = f"Detected {len(boxes)} object(s)" if boxes else "No objects detected"
131
  return boxes, text
132
 
133
  def compute_faces_overlay(image):
134
  frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
135
  h, w, _ = frame_bgr.shape
136
- # 2) Downscale
137
  frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
138
  small_h, small_w, _ = frame_bgr_small.shape
139
 
@@ -148,8 +137,6 @@ def compute_faces_overlay(image):
148
  y = int(bbox.ymin * small_h)
149
  box_w = int(bbox.width * small_w)
150
  box_h = int(bbox.height * small_h)
151
- # Scale bounding box coords back to original if you need full resolution
152
- # E.g., x_original = int(x * (w / small_w)), etc.
153
  boxes.append((x, y, x + box_w, y + box_h))
154
  text = f"Detected {len(boxes)} face(s)"
155
  else:
@@ -157,13 +144,12 @@ def compute_faces_overlay(image):
157
  return boxes, text
158
 
159
  # -----------------------------
160
- # Main Analysis Functions
161
  # -----------------------------
162
  def analyze_posture_current(image):
163
  global posture_cache
164
  posture_cache["counter"] += 1
165
  current_frame = np.array(image)
166
-
167
  if posture_cache["counter"] % SKIP_RATE == 0 or posture_cache["landmarks"] is None:
168
  landmarks, text = compute_posture_overlay(image)
169
  posture_cache["landmarks"] = landmarks
@@ -173,24 +159,22 @@ def analyze_posture_current(image):
173
  if posture_cache["landmarks"]:
174
  output = draw_posture_overlay(output, posture_cache["landmarks"])
175
 
176
- return output, f"Posture Analysis: {posture_cache['text']}"
177
 
178
  def analyze_emotion_current(image):
179
  global emotion_cache
180
  emotion_cache["counter"] += 1
181
  current_frame = np.array(image)
182
-
183
  if emotion_cache["counter"] % SKIP_RATE == 0 or emotion_cache["text"] is None:
184
  text = compute_emotion_overlay(image)
185
  emotion_cache["text"] = text
186
 
187
- return current_frame, f"Emotion Analysis: {emotion_cache['text']}"
188
 
189
  def analyze_objects_current(image):
190
  global objects_cache
191
  objects_cache["counter"] += 1
192
  current_frame = np.array(image)
193
-
194
  if objects_cache["counter"] % SKIP_RATE == 0 or objects_cache["boxes"] is None:
195
  boxes, text = compute_objects_overlay(image)
196
  objects_cache["boxes"] = boxes
@@ -199,14 +183,12 @@ def analyze_objects_current(image):
199
  output = current_frame.copy()
200
  if objects_cache["boxes"]:
201
  output = draw_boxes_overlay(output, objects_cache["boxes"], (255, 255, 0))
202
-
203
- return output, f"Object Detection: {objects_cache['text']}"
204
 
205
  def analyze_faces_current(image):
206
  global faces_cache
207
  faces_cache["counter"] += 1
208
  current_frame = np.array(image)
209
-
210
  if faces_cache["counter"] % SKIP_RATE == 0 or faces_cache["boxes"] is None:
211
  boxes, text = compute_faces_overlay(image)
212
  faces_cache["boxes"] = boxes
@@ -215,8 +197,38 @@ def analyze_faces_current(image):
215
  output = current_frame.copy()
216
  if faces_cache["boxes"]:
217
  output = draw_boxes_overlay(output, faces_cache["boxes"], (0, 0, 255))
218
-
219
- return output, f"Face Detection: {faces_cache['text']}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
 
221
  # -----------------------------
222
  # Custom CSS
@@ -252,30 +264,30 @@ body {
252
  """
253
 
254
  # -----------------------------
255
- # Create Individual Interfaces
256
  # -----------------------------
257
  posture_interface = gr.Interface(
258
  fn=analyze_posture_current,
259
- inputs=gr.Image(sources=["webcam"], streaming=True, label="Capture Your Posture"),
260
- outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Posture Analysis")],
261
  title="Posture Analysis",
262
  description="Detects your posture using MediaPipe.",
263
- live=True # Keep only this interface live to avoid multiple heavy computations
264
  )
265
 
266
  emotion_interface = gr.Interface(
267
  fn=analyze_emotion_current,
268
- inputs=gr.Image(sources=["webcam"], streaming=True, label="Capture Your Face"),
269
- outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Emotion Analysis")],
270
  title="Emotion Analysis",
271
  description="Detects facial emotions using FER.",
272
- live=False # Turn off streaming to reduce overhead
273
  )
274
 
275
  objects_interface = gr.Interface(
276
  fn=analyze_objects_current,
277
- inputs=gr.Image(sources=["webcam"], streaming=True, label="Capture the Scene"),
278
- outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Object Detection")],
279
  title="Object Detection",
280
  description="Detects objects using a pretrained Faster R-CNN.",
281
  live=False
@@ -283,19 +295,28 @@ objects_interface = gr.Interface(
283
 
284
  faces_interface = gr.Interface(
285
  fn=analyze_faces_current,
286
- inputs=gr.Image(sources=["webcam"], streaming=True, label="Capture Your Face"),
287
- outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Face Detection")],
288
  title="Face Detection",
289
  description="Detects faces using MediaPipe.",
290
  live=False
291
  )
292
 
 
 
 
 
 
 
 
 
 
293
  # -----------------------------
294
  # Create a Tabbed Interface
295
  # -----------------------------
296
  tabbed_interface = gr.TabbedInterface(
297
- interface_list=[posture_interface, emotion_interface, objects_interface, faces_interface],
298
- tab_names=["Posture", "Emotion", "Objects", "Faces"]
299
  )
300
 
301
  # -----------------------------
@@ -303,10 +324,9 @@ tabbed_interface = gr.TabbedInterface(
303
  # -----------------------------
304
  demo = gr.Blocks(css=custom_css)
305
  with demo:
306
- gr.Markdown("<h1 class='gradio-title'>Real-Time Multi-Analysis App</h1>")
307
  gr.Markdown(
308
- "<p class='gradio-description'>Experience a high-tech cinematic interface for real-time "
309
- "analysis of your posture, emotions, objects, and faces using your webcam.</p>"
310
  )
311
  tabbed_interface.render()
312
 
 
11
  # -----------------------------
12
  # Configuration
13
  # -----------------------------
14
+ # For image processing, always run the analysis (no frame skipping)
15
+ SKIP_RATE = 1
16
 
17
+ # Use GPU if available
18
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19
 
20
+ # Desired input size for faster inference
21
  DESIRED_SIZE = (640, 480)
22
 
23
  # -----------------------------
 
45
 
46
  obj_transform = transforms.Compose([transforms.ToTensor()])
47
 
48
+ # Initialize the FER emotion detector
 
49
  emotion_detector = FER(mtcnn=True)
50
 
51
  # -----------------------------
52
  # Overlay Drawing Functions
53
  # -----------------------------
54
  def draw_posture_overlay(raw_frame, landmarks):
55
+ # Draw circles for each landmark using lime green (BGR: (50,205,50))
56
  for (x, y) in landmarks:
57
+ cv2.circle(raw_frame, (x, y), 4, (50, 205, 50), -1)
58
  return raw_frame
59
 
60
  def draw_boxes_overlay(raw_frame, boxes, color):
 
66
  # Heavy (Synchronous) Detection Functions
67
  # -----------------------------
68
  def compute_posture_overlay(image):
 
69
  frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
70
  h, w, _ = frame_bgr.shape
 
 
71
  frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
72
  small_h, small_w, _ = frame_bgr_small.shape
73
 
74
  frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
75
  pose_results = pose.process(frame_rgb_small)
76
 
 
77
  if pose_results.pose_landmarks:
78
  landmarks = []
79
  for lm in pose_results.pose_landmarks.landmark:
80
+ # Scale landmarks back to the original image size
81
  x = int(lm.x * small_w * (w / small_w))
82
  y = int(lm.y * small_h * (h / small_h))
83
  landmarks.append((x, y))
 
89
  return landmarks, text
90
 
91
  def compute_emotion_overlay(image):
 
92
  frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
 
93
  frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
94
  frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
95
 
 
103
 
104
  def compute_objects_overlay(image):
105
  frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
 
106
  frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
107
  frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
108
 
 
116
  boxes = []
117
  for box, score in zip(detections["boxes"], detections["scores"]):
118
  if score > threshold:
 
 
119
  boxes.append(tuple(box.int().cpu().numpy()))
 
120
  text = f"Detected {len(boxes)} object(s)" if boxes else "No objects detected"
121
  return boxes, text
122
 
123
  def compute_faces_overlay(image):
124
  frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
125
  h, w, _ = frame_bgr.shape
 
126
  frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
127
  small_h, small_w, _ = frame_bgr_small.shape
128
 
 
137
  y = int(bbox.ymin * small_h)
138
  box_w = int(bbox.width * small_w)
139
  box_h = int(bbox.height * small_h)
 
 
140
  boxes.append((x, y, x + box_w, y + box_h))
141
  text = f"Detected {len(boxes)} face(s)"
142
  else:
 
144
  return boxes, text
145
 
146
  # -----------------------------
147
+ # Main Analysis Functions for Single Image
148
  # -----------------------------
149
  def analyze_posture_current(image):
150
  global posture_cache
151
  posture_cache["counter"] += 1
152
  current_frame = np.array(image)
 
153
  if posture_cache["counter"] % SKIP_RATE == 0 or posture_cache["landmarks"] is None:
154
  landmarks, text = compute_posture_overlay(image)
155
  posture_cache["landmarks"] = landmarks
 
159
  if posture_cache["landmarks"]:
160
  output = draw_posture_overlay(output, posture_cache["landmarks"])
161
 
162
+ return output, f"<div style='color: lime;'>Posture Analysis: {posture_cache['text']}</div>"
163
 
164
  def analyze_emotion_current(image):
165
  global emotion_cache
166
  emotion_cache["counter"] += 1
167
  current_frame = np.array(image)
 
168
  if emotion_cache["counter"] % SKIP_RATE == 0 or emotion_cache["text"] is None:
169
  text = compute_emotion_overlay(image)
170
  emotion_cache["text"] = text
171
 
172
+ return current_frame, f"<div style='color: lime;'>Emotion Analysis: {emotion_cache['text']}</div>"
173
 
174
  def analyze_objects_current(image):
175
  global objects_cache
176
  objects_cache["counter"] += 1
177
  current_frame = np.array(image)
 
178
  if objects_cache["counter"] % SKIP_RATE == 0 or objects_cache["boxes"] is None:
179
  boxes, text = compute_objects_overlay(image)
180
  objects_cache["boxes"] = boxes
 
183
  output = current_frame.copy()
184
  if objects_cache["boxes"]:
185
  output = draw_boxes_overlay(output, objects_cache["boxes"], (255, 255, 0))
186
+ return output, f"<div style='color: lime;'>Object Detection: {objects_cache['text']}</div>"
 
187
 
188
  def analyze_faces_current(image):
189
  global faces_cache
190
  faces_cache["counter"] += 1
191
  current_frame = np.array(image)
 
192
  if faces_cache["counter"] % SKIP_RATE == 0 or faces_cache["boxes"] is None:
193
  boxes, text = compute_faces_overlay(image)
194
  faces_cache["boxes"] = boxes
 
197
  output = current_frame.copy()
198
  if faces_cache["boxes"]:
199
  output = draw_boxes_overlay(output, faces_cache["boxes"], (0, 0, 255))
200
+ return output, f"<div style='color: lime;'>Face Detection: {faces_cache['text']}</div>"
201
+
202
+ def analyze_all(image):
203
+ # Run all analyses on the same image
204
+ current_frame = np.array(image).copy()
205
+
206
+ # Posture Analysis
207
+ landmarks, posture_text = compute_posture_overlay(image)
208
+ if landmarks:
209
+ current_frame = draw_posture_overlay(current_frame, landmarks)
210
+
211
+ # Emotion Analysis
212
+ emotion_text = compute_emotion_overlay(image)
213
+
214
+ # Object Detection
215
+ boxes_obj, objects_text = compute_objects_overlay(image)
216
+ if boxes_obj:
217
+ current_frame = draw_boxes_overlay(current_frame, boxes_obj, (255, 255, 0))
218
+
219
+ # Face Detection
220
+ boxes_face, faces_text = compute_faces_overlay(image)
221
+ if boxes_face:
222
+ current_frame = draw_boxes_overlay(current_frame, boxes_face, (0, 0, 255))
223
+
224
+ combined_text = (
225
+ f"Posture Analysis: {posture_text}<br>"
226
+ f"Emotion Analysis: {emotion_text}<br>"
227
+ f"Object Detection: {objects_text}<br>"
228
+ f"Face Detection: {faces_text}"
229
+ )
230
+ combined_text_html = f"<div style='color: lime;'>{combined_text}</div>"
231
+ return current_frame, combined_text_html
232
 
233
  # -----------------------------
234
  # Custom CSS
 
264
  """
265
 
266
  # -----------------------------
267
+ # Create Individual Interfaces for Image Processing
268
  # -----------------------------
269
  posture_interface = gr.Interface(
270
  fn=analyze_posture_current,
271
+ inputs=gr.Image(label="Upload an Image for Posture Analysis"),
272
+ outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.HTML(label="Posture Analysis")],
273
  title="Posture Analysis",
274
  description="Detects your posture using MediaPipe.",
275
+ live=False
276
  )
277
 
278
  emotion_interface = gr.Interface(
279
  fn=analyze_emotion_current,
280
+ inputs=gr.Image(label="Upload an Image for Emotion Analysis"),
281
+ outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.HTML(label="Emotion Analysis")],
282
  title="Emotion Analysis",
283
  description="Detects facial emotions using FER.",
284
+ live=False
285
  )
286
 
287
  objects_interface = gr.Interface(
288
  fn=analyze_objects_current,
289
+ inputs=gr.Image(label="Upload an Image for Object Detection"),
290
+ outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.HTML(label="Object Detection")],
291
  title="Object Detection",
292
  description="Detects objects using a pretrained Faster R-CNN.",
293
  live=False
 
295
 
296
  faces_interface = gr.Interface(
297
  fn=analyze_faces_current,
298
+ inputs=gr.Image(label="Upload an Image for Face Detection"),
299
+ outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.HTML(label="Face Detection")],
300
  title="Face Detection",
301
  description="Detects faces using MediaPipe.",
302
  live=False
303
  )
304
 
305
+ all_interface = gr.Interface(
306
+ fn=analyze_all,
307
+ inputs=gr.Image(label="Upload an Image for All Inferences"),
308
+ outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.HTML(label="Combined Analysis")],
309
+ title="All Inferences",
310
+ description="Runs posture, emotion, object, and face detection all at once.",
311
+ live=False
312
+ )
313
+
314
  # -----------------------------
315
  # Create a Tabbed Interface
316
  # -----------------------------
317
  tabbed_interface = gr.TabbedInterface(
318
+ interface_list=[posture_interface, emotion_interface, objects_interface, faces_interface, all_interface],
319
+ tab_names=["Posture", "Emotion", "Objects", "Faces", "All Inferences"]
320
  )
321
 
322
  # -----------------------------
 
324
  # -----------------------------
325
  demo = gr.Blocks(css=custom_css)
326
  with demo:
327
+ gr.Markdown("<h1 class='gradio-title'>Multi-Analysis Image App</h1>")
328
  gr.Markdown(
329
+ "<p class='gradio-description'>Upload an image to run analysis for posture, emotions, objects, and faces.</p>"
 
330
  )
331
  tabbed_interface.render()
332