David Driscoll commited on
Commit
d4ac8c5
·
1 Parent(s): 02a025d

Update app

Browse files
Files changed (1) hide show
  1. app.py +29 -19
app.py CHANGED
@@ -3,6 +3,7 @@ import cv2
3
  import numpy as np
4
  import torch
5
  from torchvision import models, transforms
 
6
  from PIL import Image
7
  import mediapipe as mp
8
  from fer import FER # Facial emotion recognition
@@ -21,7 +22,9 @@ mp_face_detection = mp.solutions.face_detection
21
  face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5)
22
 
23
  # Object Detection Model: Faster R-CNN (pretrained on COCO)
24
- object_detection_model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
 
 
25
  object_detection_model.eval()
26
  obj_transform = transforms.Compose([transforms.ToTensor()])
27
 
@@ -38,7 +41,7 @@ def analyze_posture(frame_rgb, output_frame):
38
  posture_text = "No posture detected"
39
  if pose_results.pose_landmarks:
40
  posture_text = "Posture detected"
41
- # Draw the pose landmarks on the output image (convert back to BGR for OpenCV)
42
  mp_drawing.draw_landmarks(
43
  output_frame, pose_results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
44
  mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2),
@@ -65,7 +68,7 @@ def analyze_objects(frame_rgb, output_frame):
65
  img_tensor = obj_transform(image_pil)
66
  with torch.no_grad():
67
  detections = object_detection_model([img_tensor])[0]
68
-
69
  threshold = 0.8
70
  detected_boxes = detections["boxes"][detections["scores"] > threshold]
71
  for box in detected_boxes:
@@ -94,26 +97,33 @@ def analyze_faces(frame_rgb, output_frame):
94
  # Main Analysis Function
95
  # -----------------------------
96
 
97
- def analyze_webcam(frame):
98
  """
99
- Runs posture analysis, facial emotion analysis, object detection, and face detection
100
- on the given webcam frame. Returns an annotated image and a textual summary.
 
 
101
  """
102
- if frame is None:
103
- return None, "No frame provided."
104
-
105
- # The input frame is in BGR (as from OpenCV). Create a copy for drawing.
 
 
 
 
 
106
  output_frame = frame.copy()
107
-
108
- # Convert frame to RGB for analysis
109
  frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
110
-
111
  # Run analyses
112
  posture_result = analyze_posture(frame_rgb, output_frame)
113
  emotion_result = analyze_emotion(frame)
114
  object_result = analyze_objects(frame_rgb, output_frame)
115
  face_result = analyze_faces(frame_rgb, output_frame)
116
-
117
  # Compose the result summary text
118
  summary = (
119
  f"Posture Analysis: {posture_result}\n"
@@ -121,25 +131,25 @@ def analyze_webcam(frame):
121
  f"Object Detection: {object_result}\n"
122
  f"Face Detection: {face_result}"
123
  )
124
-
125
- # Optionally, overlay some of the summary text on the image
126
  cv2.putText(output_frame, f"Emotion: {emotion_result}", (10, 30),
127
  cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
128
  cv2.putText(output_frame, f"Objects: {object_result}", (10, 70),
129
  cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
130
  cv2.putText(output_frame, f"Faces: {face_result}", (10, 110),
131
  cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
132
-
133
  return output_frame, summary
134
 
135
  # -----------------------------
136
  # Gradio Interface Setup
137
  # -----------------------------
138
 
139
- # We output both an image (with drawn annotations) and a text summary.
140
  interface = gr.Interface(
141
  fn=analyze_webcam,
142
- inputs=gr.Image(source="webcam", streaming=True, label="Webcam Feed"),
143
  outputs=[
144
  gr.Image(type="numpy", label="Annotated Output"),
145
  gr.Textbox(label="Analysis Summary")
 
3
  import numpy as np
4
  import torch
5
  from torchvision import models, transforms
6
+ from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights
7
  from PIL import Image
8
  import mediapipe as mp
9
  from fer import FER # Facial emotion recognition
 
22
  face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5)
23
 
24
  # Object Detection Model: Faster R-CNN (pretrained on COCO)
25
+ object_detection_model = models.detection.fasterrcnn_resnet50_fpn(
26
+ weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT
27
+ )
28
  object_detection_model.eval()
29
  obj_transform = transforms.Compose([transforms.ToTensor()])
30
 
 
41
  posture_text = "No posture detected"
42
  if pose_results.pose_landmarks:
43
  posture_text = "Posture detected"
44
+ # Draw the pose landmarks on the output image
45
  mp_drawing.draw_landmarks(
46
  output_frame, pose_results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
47
  mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2),
 
68
  img_tensor = obj_transform(image_pil)
69
  with torch.no_grad():
70
  detections = object_detection_model([img_tensor])[0]
71
+
72
  threshold = 0.8
73
  detected_boxes = detections["boxes"][detections["scores"] > threshold]
74
  for box in detected_boxes:
 
97
  # Main Analysis Function
98
  # -----------------------------
99
 
100
+ def analyze_webcam(video_path):
101
  """
102
+ Receives a video file from the webcam, extracts one frame,
103
+ then runs posture analysis, facial emotion detection, object detection,
104
+ and face detection on that frame.
105
+ Returns an annotated image and a textual summary.
106
  """
107
+ # Open the video file (the webcam stream is saved as a temporary file)
108
+ cap = cv2.VideoCapture(video_path)
109
+ success, frame = cap.read()
110
+ cap.release()
111
+
112
+ if not success:
113
+ return None, "Could not read a frame from the video."
114
+
115
+ # Create a copy for drawing annotations
116
  output_frame = frame.copy()
117
+
118
+ # Convert frame to RGB for certain analyses
119
  frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
120
+
121
  # Run analyses
122
  posture_result = analyze_posture(frame_rgb, output_frame)
123
  emotion_result = analyze_emotion(frame)
124
  object_result = analyze_objects(frame_rgb, output_frame)
125
  face_result = analyze_faces(frame_rgb, output_frame)
126
+
127
  # Compose the result summary text
128
  summary = (
129
  f"Posture Analysis: {posture_result}\n"
 
131
  f"Object Detection: {object_result}\n"
132
  f"Face Detection: {face_result}"
133
  )
134
+
135
+ # Optionally, overlay some summary text on the image
136
  cv2.putText(output_frame, f"Emotion: {emotion_result}", (10, 30),
137
  cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
138
  cv2.putText(output_frame, f"Objects: {object_result}", (10, 70),
139
  cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
140
  cv2.putText(output_frame, f"Faces: {face_result}", (10, 110),
141
  cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
142
+
143
  return output_frame, summary
144
 
145
  # -----------------------------
146
  # Gradio Interface Setup
147
  # -----------------------------
148
 
149
+ # Using gr.Video to capture webcam input in Gradio 5.x
150
  interface = gr.Interface(
151
  fn=analyze_webcam,
152
+ inputs=gr.Video(source="webcam", streaming=True, label="Webcam Feed"),
153
  outputs=[
154
  gr.Image(type="numpy", label="Annotated Output"),
155
  gr.Textbox(label="Analysis Summary")