Spaces:

ddriscoll
/

SOC3242-01_Group_3_Interactive

Sleeping

App Files Files Community

David Driscoll commited on Feb 16

Commit

d4ac8c5

1 Parent(s): 02a025d

Update app

Browse files

Files changed (1) hide show

app.py +29 -19

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import cv2
 import numpy as np
 import torch
 from torchvision import models, transforms
 from PIL import Image
 import mediapipe as mp
 from fer import FER  # Facial emotion recognition
@@ -21,7 +22,9 @@ mp_face_detection = mp.solutions.face_detection
 face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5)
 # Object Detection Model: Faster R-CNN (pretrained on COCO)
-object_detection_model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
 object_detection_model.eval()
 obj_transform = transforms.Compose([transforms.ToTensor()])
@@ -38,7 +41,7 @@ def analyze_posture(frame_rgb, output_frame):
     posture_text = "No posture detected"
     if pose_results.pose_landmarks:
         posture_text = "Posture detected"
-        # Draw the pose landmarks on the output image (convert back to BGR for OpenCV)
         mp_drawing.draw_landmarks(
             output_frame, pose_results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
             mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2),
@@ -65,7 +68,7 @@ def analyze_objects(frame_rgb, output_frame):
     img_tensor = obj_transform(image_pil)
     with torch.no_grad():
         detections = object_detection_model([img_tensor])[0]
     threshold = 0.8
     detected_boxes = detections["boxes"][detections["scores"] > threshold]
     for box in detected_boxes:
@@ -94,26 +97,33 @@ def analyze_faces(frame_rgb, output_frame):
 # Main Analysis Function
 # -----------------------------
-def analyze_webcam(frame):
     """
-    Runs posture analysis, facial emotion analysis, object detection, and face detection
-    on the given webcam frame. Returns an annotated image and a textual summary.
     """
-    if frame is None:
-        return None, "No frame provided."
-    # The input frame is in BGR (as from OpenCV). Create a copy for drawing.
     output_frame = frame.copy()
-    # Convert frame to RGB for analysis
     frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
     # Run analyses
     posture_result = analyze_posture(frame_rgb, output_frame)
     emotion_result = analyze_emotion(frame)
     object_result = analyze_objects(frame_rgb, output_frame)
     face_result = analyze_faces(frame_rgb, output_frame)
     # Compose the result summary text
     summary = (
         f"Posture Analysis: {posture_result}\n"
@@ -121,25 +131,25 @@ def analyze_webcam(frame):
         f"Object Detection: {object_result}\n"
         f"Face Detection: {face_result}"
     )
-    # Optionally, overlay some of the summary text on the image
     cv2.putText(output_frame, f"Emotion: {emotion_result}", (10, 30),
                 cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
     cv2.putText(output_frame, f"Objects: {object_result}", (10, 70),
                 cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
     cv2.putText(output_frame, f"Faces: {face_result}", (10, 110),
                 cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
     return output_frame, summary
 # -----------------------------
 # Gradio Interface Setup
 # -----------------------------
-# We output both an image (with drawn annotations) and a text summary.
 interface = gr.Interface(
     fn=analyze_webcam,
-    inputs=gr.Image(source="webcam", streaming=True, label="Webcam Feed"),
     outputs=[
         gr.Image(type="numpy", label="Annotated Output"),
         gr.Textbox(label="Analysis Summary")

 import numpy as np
 import torch
 from torchvision import models, transforms
+from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights
 from PIL import Image
 import mediapipe as mp
 from fer import FER  # Facial emotion recognition
 face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5)
 # Object Detection Model: Faster R-CNN (pretrained on COCO)
+object_detection_model = models.detection.fasterrcnn_resnet50_fpn(
+    weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT
+)
 object_detection_model.eval()
 obj_transform = transforms.Compose([transforms.ToTensor()])
     posture_text = "No posture detected"
     if pose_results.pose_landmarks:
         posture_text = "Posture detected"
+        # Draw the pose landmarks on the output image
         mp_drawing.draw_landmarks(
             output_frame, pose_results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
             mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2),
     img_tensor = obj_transform(image_pil)
     with torch.no_grad():
         detections = object_detection_model([img_tensor])[0]
     threshold = 0.8
     detected_boxes = detections["boxes"][detections["scores"] > threshold]
     for box in detected_boxes:
 # Main Analysis Function
 # -----------------------------
+def analyze_webcam(video_path):
     """
+    Receives a video file from the webcam, extracts one frame,
+    then runs posture analysis, facial emotion detection, object detection,
+    and face detection on that frame.
+    Returns an annotated image and a textual summary.
     """
+    # Open the video file (the webcam stream is saved as a temporary file)
+    cap = cv2.VideoCapture(video_path)
+    success, frame = cap.read()
+    cap.release()
+    if not success:
+        return None, "Could not read a frame from the video."
+    # Create a copy for drawing annotations
     output_frame = frame.copy()
+    # Convert frame to RGB for certain analyses
     frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
     # Run analyses
     posture_result = analyze_posture(frame_rgb, output_frame)
     emotion_result = analyze_emotion(frame)
     object_result = analyze_objects(frame_rgb, output_frame)
     face_result = analyze_faces(frame_rgb, output_frame)
     # Compose the result summary text
     summary = (
         f"Posture Analysis: {posture_result}\n"
         f"Object Detection: {object_result}\n"
         f"Face Detection: {face_result}"
     )
+    # Optionally, overlay some summary text on the image
     cv2.putText(output_frame, f"Emotion: {emotion_result}", (10, 30),
                 cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
     cv2.putText(output_frame, f"Objects: {object_result}", (10, 70),
                 cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
     cv2.putText(output_frame, f"Faces: {face_result}", (10, 110),
                 cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
     return output_frame, summary
 # -----------------------------
 # Gradio Interface Setup
 # -----------------------------
+# Using gr.Video to capture webcam input in Gradio 5.x
 interface = gr.Interface(
     fn=analyze_webcam,
+    inputs=gr.Video(source="webcam", streaming=True, label="Webcam Feed"),
     outputs=[
         gr.Image(type="numpy", label="Annotated Output"),
         gr.Textbox(label="Analysis Summary")