David Driscoll commited on
Commit
f3de933
·
1 Parent(s): 134b727

Emotion fix

Browse files
Files changed (1) hide show
  1. app.py +236 -229
app.py CHANGED
@@ -2,242 +2,269 @@ import gradio as gr
2
  import cv2
3
  import numpy as np
4
  import torch
 
 
5
  from PIL import Image
6
  import mediapipe as mp
7
 
8
- from transformers import (
9
- AutoFeatureExtractor,
10
- AutoModel,
11
- AutoImageProcessor,
12
- AutoModelForImageClassification,
13
- AutoModelForSemanticSegmentation
14
- )
15
 
16
  # -----------------------------
17
- # Configuration & Device Setup
18
  # -----------------------------
 
19
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
20
  DESIRED_SIZE = (640, 480)
21
 
22
  # -----------------------------
23
- # Initialize Mediapipe Face Detection
24
  # -----------------------------
25
- mp_face_detection = mp.solutions.face_detection
26
- face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5)
 
 
27
 
28
  # -----------------------------
29
- # Load New Models from Hugging Face
30
  # -----------------------------
 
 
 
31
 
32
- # 1. Facial Recognition & Identification (facebook/dino-vitb16)
33
- facial_recognition_extractor = AutoFeatureExtractor.from_pretrained("facebook/dino-vitb16")
34
- facial_recognition_model = AutoModel.from_pretrained("facebook/dino-vitb16")
35
- facial_recognition_model.to(device)
36
- facial_recognition_model.eval()
37
 
38
- # Create a dummy database for demonstration (embeddings of dimension 768 assumed)
39
- dummy_database = {
40
- "Alice": torch.randn(768).to(device),
41
- "Bob": torch.randn(768).to(device)
42
- }
43
 
44
- # 2. Emotion Detection (nateraw/facial-expression-recognition)
45
- emotion_processor = AutoImageProcessor.from_pretrained("nateraw/facial-expression-recognition")
46
- emotion_model = AutoModelForImageClassification.from_pretrained("nateraw/facial-expression-recognition")
 
47
  emotion_model.to(device)
48
  emotion_model.eval()
49
 
50
- # 3. Age & Gender Prediction (oayu/age-gender-estimation)
51
- age_gender_processor = AutoImageProcessor.from_pretrained("oayu/age-gender-estimation")
52
- age_gender_model = AutoModelForImageClassification.from_pretrained("oayu/age-gender-estimation")
53
- age_gender_model.to(device)
54
- age_gender_model.eval()
55
 
56
- # 4. Face Parsing (hila-chefer/face-parsing)
57
- face_parsing_processor = AutoImageProcessor.from_pretrained("hila-chefer/face-parsing")
58
- face_parsing_model = AutoModelForSemanticSegmentation.from_pretrained("hila-chefer/face-parsing")
59
- face_parsing_model.to(device)
60
- face_parsing_model.eval()
 
 
 
 
 
 
 
 
 
 
61
 
62
- # 5. Deepfake Detection (microsoft/FaceForensics)
63
- deepfake_processor = AutoImageProcessor.from_pretrained("microsoft/FaceForensics")
64
- deepfake_model = AutoModelForImageClassification.from_pretrained("microsoft/FaceForensics")
65
- deepfake_model.to(device)
66
- deepfake_model.eval()
67
 
68
  # -----------------------------
69
- # Helper Functions for New Inferences
70
  # -----------------------------
71
-
72
- def compute_facial_recognition(image):
73
- """
74
- Detects a face using MediaPipe, crops it, and computes its embedding with DINO-ViT.
75
- Compares the embedding against a dummy database to "identify" the person.
76
- """
77
- frame = np.array(image)
78
- frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
79
- frame_resized = cv2.resize(frame_bgr, DESIRED_SIZE)
80
- frame_rgb = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2RGB)
81
-
82
- face_results = face_detection.process(frame_rgb)
83
- if face_results.detections:
84
- detection = face_results.detections[0]
85
- bbox = detection.location_data.relative_bounding_box
86
- h, w, _ = frame_rgb.shape
87
- x = int(bbox.xmin * w)
88
- y = int(bbox.ymin * h)
89
- box_w = int(bbox.width * w)
90
- box_h = int(bbox.height * h)
91
- face_crop = frame_rgb[y:y+box_h, x:x+box_w]
92
- face_image = Image.fromarray(face_crop)
93
-
94
- inputs = facial_recognition_extractor(face_image, return_tensors="pt").to(device)
95
- with torch.no_grad():
96
- outputs = facial_recognition_model(**inputs)
97
- # Use mean pooling over the last hidden state to get an embedding vector
98
- embeddings = outputs.last_hidden_state.mean(dim=1).squeeze()
99
-
100
- # Compare against dummy database using cosine similarity
101
- best_score = -1
102
- best_name = "Unknown"
103
- for name, db_emb in dummy_database.items():
104
- cos_sim = torch.nn.functional.cosine_similarity(embeddings, db_emb, dim=0)
105
- if cos_sim > best_score:
106
- best_score = cos_sim
107
- best_name = name
108
- threshold = 0.7 # dummy threshold for identification
109
- if best_score > threshold:
110
- result = f"Identified as {best_name} (sim: {best_score:.2f})"
111
- else:
112
- result = f"No match found (best: {best_name}, sim: {best_score:.2f})"
113
- return face_crop, result
114
  else:
115
- return frame, "No face detected"
 
 
116
 
117
- def compute_emotion_detection(image):
118
  """
119
- Detects a face, crops it, and classifies the facial expression.
 
120
  """
121
- frame = np.array(image)
122
- frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
123
- frame_resized = cv2.resize(frame_bgr, DESIRED_SIZE)
124
- frame_rgb = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2RGB)
125
 
126
- face_results = face_detection.process(frame_rgb)
 
127
  if face_results.detections:
128
  detection = face_results.detections[0]
129
  bbox = detection.location_data.relative_bounding_box
130
- h, w, _ = frame_rgb.shape
131
  x = int(bbox.xmin * w)
132
  y = int(bbox.ymin * h)
133
  box_w = int(bbox.width * w)
134
  box_h = int(bbox.height * h)
135
- face_crop = frame_rgb[y:y+box_h, x:x+box_w]
136
  face_image = Image.fromarray(face_crop)
137
 
 
138
  inputs = emotion_processor(face_image, return_tensors="pt").to(device)
139
  with torch.no_grad():
140
  outputs = emotion_model(**inputs)
141
  logits = outputs.logits
142
- pred = logits.argmax(-1).item()
143
- label = emotion_model.config.id2label[pred]
144
- return face_crop, f"Emotion: {label}"
 
145
  else:
146
- return frame, "No face detected"
 
147
 
148
- def compute_age_gender(image):
149
- """
150
- Detects a face, crops it, and predicts the age & gender.
151
- """
152
- frame = np.array(image)
153
- frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
154
- frame_resized = cv2.resize(frame_bgr, DESIRED_SIZE)
155
- frame_rgb = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2RGB)
156
-
157
- face_results = face_detection.process(frame_rgb)
158
- if face_results.detections:
159
- detection = face_results.detections[0]
160
- bbox = detection.location_data.relative_bounding_box
161
- h, w, _ = frame_rgb.shape
162
- x = int(bbox.xmin * w)
163
- y = int(bbox.ymin * h)
164
- box_w = int(bbox.width * w)
165
- box_h = int(bbox.height * h)
166
- face_crop = frame_rgb[y:y+box_h, x:x+box_w]
167
- face_image = Image.fromarray(face_crop)
168
-
169
- inputs = age_gender_processor(face_image, return_tensors="pt").to(device)
170
- with torch.no_grad():
171
- outputs = age_gender_model(**inputs)
172
- logits = outputs.logits
173
- pred = logits.argmax(-1).item()
174
- label = age_gender_model.config.id2label[pred]
175
- return face_crop, f"Age & Gender: {label}"
176
- else:
177
- return frame, "No face detected"
178
-
179
- def compute_face_parsing(image):
180
- """
181
- Runs face parsing (segmentation) on the provided image.
182
- """
183
- image_pil = Image.fromarray(np.array(image))
184
- inputs = face_parsing_processor(image_pil, return_tensors="pt").to(device)
185
  with torch.no_grad():
186
- outputs = face_parsing_model(**inputs)
187
- logits = outputs.logits # shape: (batch, num_labels, H, W)
188
- segmentation = logits.argmax(dim=1)[0].cpu().numpy()
189
- # For visualization, we apply a color map to the segmentation mask.
190
- segmentation_norm = np.uint8(255 * segmentation / (segmentation.max() + 1e-5))
191
- segmentation_color = cv2.applyColorMap(segmentation_norm, cv2.COLORMAP_JET)
192
- return segmentation_color, "Face Parsing completed"
 
 
 
 
 
 
193
 
194
- def compute_deepfake_detection(image):
195
- """
196
- Runs deepfake detection on the image.
197
- """
198
- image_pil = Image.fromarray(np.array(image))
199
- inputs = deepfake_processor(image_pil, return_tensors="pt").to(device)
200
- with torch.no_grad():
201
- outputs = deepfake_model(**inputs)
202
- logits = outputs.logits
203
- pred = logits.argmax(-1).item()
204
- label = deepfake_model.config.id2label[pred]
205
- return np.array(image), f"Deepfake Detection: {label}"
 
 
 
 
 
 
 
 
206
 
207
  # -----------------------------
208
- # Analysis Functions (Wrapping Inference & Green Text)
209
  # -----------------------------
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
- def analyze_facial_recognition(image):
212
- annotated_face, result = compute_facial_recognition(image)
213
- return annotated_face, f"<div style='color: lime !important;'>Facial Recognition: {result}</div>"
214
-
215
- def analyze_emotion_detection(image):
216
- face_crop, result = compute_emotion_detection(image)
217
- return face_crop, f"<div style='color: lime !important;'>{result}</div>"
 
218
 
219
- def analyze_age_gender(image):
220
- face_crop, result = compute_age_gender(image)
221
- return face_crop, f"<div style='color: lime !important;'>{result}</div>"
 
 
 
 
 
 
 
 
 
 
 
222
 
223
- def analyze_face_parsing(image):
224
- segmentation, result = compute_face_parsing(image)
225
- return segmentation, f"<div style='color: lime !important;'>{result}</div>"
 
 
 
 
 
 
 
 
 
226
 
227
- def analyze_deepfake_detection(image):
228
- output, result = compute_deepfake_detection(image)
229
- return output, f"<div style='color: lime !important;'>{result}</div>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
 
231
  # -----------------------------
232
- # Custom CSS (All Text in Green)
233
  # -----------------------------
234
  custom_css = """
235
  @import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@400;700&display=swap');
236
  body {
237
  background-color: #0e0e0e;
238
  font-family: 'Orbitron', sans-serif;
239
- margin: 0;
240
- padding: 0;
241
  color: #32CD32;
242
  }
243
  .gradio-container {
@@ -261,85 +288,65 @@ input, button, .output {
261
  """
262
 
263
  # -----------------------------
264
- # Create Gradio Interfaces for New Models
265
  # -----------------------------
266
- facial_recognition_interface = gr.Interface(
267
- fn=analyze_facial_recognition,
268
- inputs=gr.Image(label="Upload a Face Image for Facial Recognition"),
269
- outputs=[gr.Image(type="numpy", label="Cropped Face / Embedding Visualization"),
270
- gr.HTML(label="Facial Recognition Result")],
271
- title="Facial Recognition & Identification",
272
- description="Extracts facial embeddings using facebook/dino-vitb16 and identifies the face by comparing against a dummy database.",
273
  live=False
274
  )
275
 
276
  emotion_interface = gr.Interface(
277
- fn=analyze_emotion_detection,
278
- inputs=gr.Image(label="Upload a Face Image for Emotion Detection"),
279
- outputs=[gr.Image(type="numpy", label="Cropped Face"),
280
- gr.HTML(label="Emotion Detection")],
281
- title="Emotion Detection",
282
- description="Classifies the facial expression using nateraw/facial-expression-recognition.",
283
  live=False
284
  )
285
 
286
- age_gender_interface = gr.Interface(
287
- fn=analyze_age_gender,
288
- inputs=gr.Image(label="Upload a Face Image for Age & Gender Prediction"),
289
- outputs=[gr.Image(type="numpy", label="Cropped Face"),
290
- gr.HTML(label="Age & Gender Prediction")],
291
- title="Age & Gender Prediction",
292
- description="Predicts age and gender from the face using oayu/age-gender-estimation.",
293
  live=False
294
  )
295
 
296
- face_parsing_interface = gr.Interface(
297
- fn=analyze_face_parsing,
298
- inputs=gr.Image(label="Upload a Face Image for Face Parsing"),
299
- outputs=[gr.Image(type="numpy", label="Segmentation Overlay"),
300
- gr.HTML(label="Face Parsing")],
301
- title="Face Parsing",
302
- description="Segments face regions (eyes, nose, lips, hair, etc.) using hila-chefer/face-parsing.",
303
  live=False
304
  )
305
 
306
- deepfake_interface = gr.Interface(
307
- fn=analyze_deepfake_detection,
308
- inputs=gr.Image(label="Upload an Image for Deepfake Detection"),
309
- outputs=[gr.Image(type="numpy", label="Input Image"),
310
- gr.HTML(label="Deepfake Detection")],
311
- title="Deepfake Detection",
312
- description="Detects manipulated or deepfake images using microsoft/FaceForensics.",
313
  live=False
314
  )
315
 
316
- # -----------------------------
317
- # Create a Tabbed Interface
318
- # -----------------------------
319
  tabbed_interface = gr.TabbedInterface(
320
- interface_list=[
321
- facial_recognition_interface,
322
- emotion_interface,
323
- age_gender_interface,
324
- face_parsing_interface,
325
- deepfake_interface
326
- ],
327
- tab_names=[
328
- "Facial Recognition",
329
- "Emotion Detection",
330
- "Age & Gender",
331
- "Face Parsing",
332
- "Deepfake Detection"
333
- ]
334
  )
335
 
336
  # -----------------------------
337
- # Wrap in a Blocks Layout & Launch
338
  # -----------------------------
339
  demo = gr.Blocks(css=custom_css)
340
  with demo:
341
- gr.Markdown("<h1 class='gradio-title' style='color: #32CD32;'>Multi-Analysis Face App</h1>")
342
- gr.Markdown("<p class='gradio-description' style='color: #32CD32;'>Upload an image to run advanced face analysis using state-of-the-art Hugging Face models.</p>")
343
  tabbed_interface.render()
344
 
345
  if __name__ == "__main__":
 
2
  import cv2
3
  import numpy as np
4
  import torch
5
+ from torchvision import models, transforms
6
+ from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights
7
  from PIL import Image
8
  import mediapipe as mp
9
 
10
+ # Hugging Face imports for emotion detection
11
+ from transformers import AutoImageProcessor, AutoModelForImageClassification
 
 
 
 
 
12
 
13
  # -----------------------------
14
+ # Configuration
15
  # -----------------------------
16
+ SKIP_RATE = 1 # For image processing, always run the analysis
17
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
  DESIRED_SIZE = (640, 480)
19
 
20
  # -----------------------------
21
+ # Global caches for overlay info and frame counters
22
  # -----------------------------
23
+ posture_cache = {"landmarks": None, "text": "Initializing...", "counter": 0}
24
+ emotion_cache = {"text": "Initializing...", "counter": 0}
25
+ objects_cache = {"boxes": None, "text": "Initializing...", "object_list_text": "", "counter": 0}
26
+ faces_cache = {"boxes": None, "text": "Initializing...", "counter": 0}
27
 
28
  # -----------------------------
29
+ # Initialize Models and Helpers
30
  # -----------------------------
31
+ mp_pose = mp.solutions.pose
32
+ pose = mp_pose.Pose()
33
+ mp_drawing = mp.solutions.drawing_utils
34
 
35
+ mp_face_detection = mp.solutions.face_detection
36
+ face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5)
 
 
 
37
 
38
+ object_detection_model = models.detection.fasterrcnn_resnet50_fpn(
39
+ weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT
40
+ )
41
+ object_detection_model.eval().to(device)
42
+ obj_transform = transforms.Compose([transforms.ToTensor()])
43
 
44
+ # Initialize the Hugging Face emotion detection model.
45
+ # (Using the public "nateraw/fer" repo to mimic expression recognition.)
46
+ emotion_processor = AutoImageProcessor.from_pretrained("nateraw/fer")
47
+ emotion_model = AutoModelForImageClassification.from_pretrained("nateraw/fer")
48
  emotion_model.to(device)
49
  emotion_model.eval()
50
 
51
+ # Retrieve object categories from model weights metadata
52
+ object_categories = FasterRCNN_ResNet50_FPN_Weights.DEFAULT.meta["categories"]
 
 
 
53
 
54
+ # -----------------------------
55
+ # Overlay Drawing Functions
56
+ # -----------------------------
57
+ def draw_posture_overlay(raw_frame, landmarks):
58
+ # Draw connector lines using MediaPipe's POSE_CONNECTIONS
59
+ for connection in mp_pose.POSE_CONNECTIONS:
60
+ start_idx, end_idx = connection
61
+ if start_idx < len(landmarks) and end_idx < len(landmarks):
62
+ start_point = landmarks[start_idx]
63
+ end_point = landmarks[end_idx]
64
+ cv2.line(raw_frame, start_point, end_point, (50, 205, 50), 2)
65
+ # Draw landmark points in lime green (BGR: (50,205,50))
66
+ for (x, y) in landmarks:
67
+ cv2.circle(raw_frame, (x, y), 4, (50, 205, 50), -1)
68
+ return raw_frame
69
 
70
+ def draw_boxes_overlay(raw_frame, boxes, color):
71
+ for (x1, y1, x2, y2) in boxes:
72
+ cv2.rectangle(raw_frame, (x1, y1), (x2, y2), color, 2)
73
+ return raw_frame
 
74
 
75
  # -----------------------------
76
+ # Heavy (Synchronous) Detection Functions
77
  # -----------------------------
78
+ def compute_posture_overlay(image):
79
+ frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
80
+ h, w, _ = frame_bgr.shape
81
+ frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
82
+ small_h, small_w, _ = frame_bgr_small.shape
83
+ frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
84
+ pose_results = pose.process(frame_rgb_small)
85
+ if pose_results.pose_landmarks:
86
+ landmarks = []
87
+ for lm in pose_results.pose_landmarks.landmark:
88
+ # Scale landmarks back to the original image size
89
+ x = int(lm.x * small_w * (w / small_w))
90
+ y = int(lm.y * small_h * (h / small_h))
91
+ landmarks.append((x, y))
92
+ text = "Posture detected"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  else:
94
+ landmarks = []
95
+ text = "No posture detected"
96
+ return landmarks, text
97
 
98
+ def compute_emotion_overlay(image):
99
  """
100
+ This function mimics the original FER-based expression recognition,
101
+ but uses a Hugging Face emotion model instead.
102
  """
103
+ frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
104
+ frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
105
+ frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
 
106
 
107
+ # Use MediaPipe to detect a face and crop it
108
+ face_results = face_detection.process(frame_rgb_small)
109
  if face_results.detections:
110
  detection = face_results.detections[0]
111
  bbox = detection.location_data.relative_bounding_box
112
+ h, w, _ = frame_rgb_small.shape
113
  x = int(bbox.xmin * w)
114
  y = int(bbox.ymin * h)
115
  box_w = int(bbox.width * w)
116
  box_h = int(bbox.height * h)
117
+ face_crop = frame_rgb_small[y:y+box_h, x:x+box_w]
118
  face_image = Image.fromarray(face_crop)
119
 
120
+ # Process face crop with the Hugging Face emotion model
121
  inputs = emotion_processor(face_image, return_tensors="pt").to(device)
122
  with torch.no_grad():
123
  outputs = emotion_model(**inputs)
124
  logits = outputs.logits
125
+ probs = torch.softmax(logits, dim=-1)
126
+ score, pred = torch.max(probs, dim=-1)
127
+ label = emotion_model.config.id2label[pred.item()]
128
+ text = f"{label} ({score.item():.2f})"
129
  else:
130
+ text = "No face detected"
131
+ return text
132
 
133
+ def compute_objects_overlay(image):
134
+ frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
135
+ frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
136
+ frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
137
+ image_pil = Image.fromarray(frame_rgb_small)
138
+ img_tensor = obj_transform(image_pil).to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  with torch.no_grad():
140
+ detections = object_detection_model([img_tensor])[0]
141
+ threshold = 0.8
142
+ boxes = []
143
+ object_list = []
144
+ for box, score, label in zip(detections["boxes"], detections["scores"], detections["labels"]):
145
+ if score > threshold:
146
+ boxes.append(tuple(box.int().cpu().numpy()))
147
+ label_idx = int(label)
148
+ label_name = object_categories[label_idx] if label_idx < len(object_categories) else "Unknown"
149
+ object_list.append(f"{label_name} ({score:.2f})")
150
+ text = f"Detected {len(boxes)} object(s)" if boxes else "No objects detected"
151
+ object_list_text = " | ".join(object_list) if object_list else "None"
152
+ return boxes, text, object_list_text
153
 
154
+ def compute_faces_overlay(image):
155
+ frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
156
+ h, w, _ = frame_bgr.shape
157
+ frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
158
+ small_h, small_w, _ = frame_bgr_small.shape
159
+ frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
160
+ face_results = face_detection.process(frame_rgb_small)
161
+ boxes = []
162
+ if face_results.detections:
163
+ for detection in face_results.detections:
164
+ bbox = detection.location_data.relative_bounding_box
165
+ x = int(bbox.xmin * small_w)
166
+ y = int(bbox.ymin * small_h)
167
+ box_w = int(bbox.width * small_w)
168
+ box_h = int(bbox.height * small_h)
169
+ boxes.append((x, y, x + box_w, y + box_h))
170
+ text = f"Detected {len(boxes)} face(s)"
171
+ else:
172
+ text = "No faces detected"
173
+ return boxes, text
174
 
175
  # -----------------------------
176
+ # Main Analysis Functions for Single Image
177
  # -----------------------------
178
+ def analyze_posture_current(image):
179
+ global posture_cache
180
+ posture_cache["counter"] += 1
181
+ current_frame = np.array(image)
182
+ if posture_cache["counter"] % SKIP_RATE == 0 or posture_cache["landmarks"] is None:
183
+ landmarks, text = compute_posture_overlay(image)
184
+ posture_cache["landmarks"] = landmarks
185
+ posture_cache["text"] = text
186
+ output = current_frame.copy()
187
+ if posture_cache["landmarks"]:
188
+ output = draw_posture_overlay(output, posture_cache["landmarks"])
189
+ return output, f"<div style='color: lime !important;'>Posture Analysis: {posture_cache['text']}</div>"
190
 
191
+ def analyze_emotion_current(image):
192
+ global emotion_cache
193
+ emotion_cache["counter"] += 1
194
+ current_frame = np.array(image)
195
+ if emotion_cache["counter"] % SKIP_RATE == 0 or emotion_cache["text"] is None:
196
+ text = compute_emotion_overlay(image)
197
+ emotion_cache["text"] = text
198
+ return current_frame, f"<div style='color: lime !important;'>Emotion Analysis: {emotion_cache['text']}</div>"
199
 
200
+ def analyze_objects_current(image):
201
+ global objects_cache
202
+ objects_cache["counter"] += 1
203
+ current_frame = np.array(image)
204
+ if objects_cache["counter"] % SKIP_RATE == 0 or objects_cache["boxes"] is None:
205
+ boxes, text, object_list_text = compute_objects_overlay(image)
206
+ objects_cache["boxes"] = boxes
207
+ objects_cache["text"] = text
208
+ objects_cache["object_list_text"] = object_list_text
209
+ output = current_frame.copy()
210
+ if objects_cache["boxes"]:
211
+ output = draw_boxes_overlay(output, objects_cache["boxes"], (255, 255, 0))
212
+ combined_text = f"Object Detection: {objects_cache['text']}<br>Details: {objects_cache['object_list_text']}"
213
+ return output, f"<div style='color: lime !important;'>{combined_text}</div>"
214
 
215
+ def analyze_faces_current(image):
216
+ global faces_cache
217
+ faces_cache["counter"] += 1
218
+ current_frame = np.array(image)
219
+ if faces_cache["counter"] % SKIP_RATE == 0 or faces_cache["boxes"] is None:
220
+ boxes, text = compute_faces_overlay(image)
221
+ faces_cache["boxes"] = boxes
222
+ faces_cache["text"] = text
223
+ output = current_frame.copy()
224
+ if faces_cache["boxes"]:
225
+ output = draw_boxes_overlay(output, faces_cache["boxes"], (0, 0, 255))
226
+ return output, f"<div style='color: lime !important;'>Face Detection: {faces_cache['text']}</div>"
227
 
228
+ def analyze_all(image):
229
+ current_frame = np.array(image).copy()
230
+ # Posture Analysis
231
+ landmarks, posture_text = compute_posture_overlay(image)
232
+ if landmarks:
233
+ current_frame = draw_posture_overlay(current_frame, landmarks)
234
+ # Emotion Analysis
235
+ emotion_text = compute_emotion_overlay(image)
236
+ # Object Detection
237
+ boxes_obj, objects_text, object_list_text = compute_objects_overlay(image)
238
+ if boxes_obj:
239
+ current_frame = draw_boxes_overlay(current_frame, boxes_obj, (255, 255, 0))
240
+ # Face Detection
241
+ boxes_face, faces_text = compute_faces_overlay(image)
242
+ if boxes_face:
243
+ current_frame = draw_boxes_overlay(current_frame, boxes_face, (0, 0, 255))
244
+ # Combined Analysis Text
245
+ combined_text = (
246
+ f"<b>Posture Analysis:</b> {posture_text}<br>"
247
+ f"<b>Emotion Analysis:</b> {emotion_text}<br>"
248
+ f"<b>Object Detection:</b> {objects_text}<br>"
249
+ f"<b>Detected Objects:</b> {object_list_text}<br>"
250
+ f"<b>Face Detection:</b> {faces_text}"
251
+ )
252
+ if object_list_text and object_list_text != "None":
253
+ description_text = f"Image Description: The scene features {object_list_text}."
254
+ else:
255
+ description_text = "Image Description: No prominent objects detected."
256
+ combined_text += f"<br><br><div style='border:1px solid lime; padding:10px; box-shadow: 0 0 10px lime;'><b>{description_text}</b></div>"
257
+ combined_text_html = f"<div style='color: lime !important;'>{combined_text}</div>"
258
+ return current_frame, combined_text_html
259
 
260
  # -----------------------------
261
+ # Custom CSS (High-Tech Neon Theme)
262
  # -----------------------------
263
  custom_css = """
264
  @import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@400;700&display=swap');
265
  body {
266
  background-color: #0e0e0e;
267
  font-family: 'Orbitron', sans-serif;
 
 
268
  color: #32CD32;
269
  }
270
  .gradio-container {
 
288
  """
289
 
290
  # -----------------------------
291
+ # Create Individual Interfaces for Image Processing
292
  # -----------------------------
293
+ posture_interface = gr.Interface(
294
+ fn=analyze_posture_current,
295
+ inputs=gr.Image(label="Upload an Image for Posture Analysis"),
296
+ outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.HTML(label="Posture Analysis")],
297
+ title="Posture",
298
+ description="Detects your posture using MediaPipe with connector lines.",
 
299
  live=False
300
  )
301
 
302
  emotion_interface = gr.Interface(
303
+ fn=analyze_emotion_current,
304
+ inputs=gr.Image(label="Upload an Image for Emotion Analysis"),
305
+ outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.HTML(label="Emotion Analysis")],
306
+ title="Emotion",
307
+ description="Detects facial emotions using a Hugging Face model.",
 
308
  live=False
309
  )
310
 
311
+ objects_interface = gr.Interface(
312
+ fn=analyze_objects_current,
313
+ inputs=gr.Image(label="Upload an Image for Object Detection"),
314
+ outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.HTML(label="Object Detection")],
315
+ title="Objects",
316
+ description="Detects objects using a pretrained Faster R-CNN.",
 
317
  live=False
318
  )
319
 
320
+ faces_interface = gr.Interface(
321
+ fn=analyze_faces_current,
322
+ inputs=gr.Image(label="Upload an Image for Face Detection"),
323
+ outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.HTML(label="Face Detection")],
324
+ title="Faces",
325
+ description="Detects faces using MediaPipe.",
 
326
  live=False
327
  )
328
 
329
+ all_interface = gr.Interface(
330
+ fn=analyze_all,
331
+ inputs=gr.Image(label="Upload an Image for All Inferences"),
332
+ outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.HTML(label="Combined Analysis")],
333
+ title="All Inferences",
334
+ description="Runs posture, emotion, object, and face detection all at once.",
 
335
  live=False
336
  )
337
 
 
 
 
338
  tabbed_interface = gr.TabbedInterface(
339
+ interface_list=[posture_interface, emotion_interface, objects_interface, faces_interface, all_interface],
340
+ tab_names=["Posture", "Emotion", "Objects", "Faces", "All Inferences"]
 
 
 
 
 
 
 
 
 
 
 
 
341
  )
342
 
343
  # -----------------------------
344
+ # Wrap in a Blocks Layout and Launch
345
  # -----------------------------
346
  demo = gr.Blocks(css=custom_css)
347
  with demo:
348
+ gr.Markdown("<h1 class='gradio-title' style='color: #32CD32;'>Multi-Analysis Image App</h1>")
349
+ gr.Markdown("<p class='gradio-description' style='color: #32CD32;'>Upload an image to run high-tech analysis for posture, emotions, objects, and faces.</p>")
350
  tabbed_interface.render()
351
 
352
  if __name__ == "__main__":