Spaces:

ddriscoll
/

SOC3242-01_Group_3_Interactive

Sleeping

App Files Files Community

David Driscoll commited on Feb 17

Commit

f6a647b

1 Parent(s): 4a53aae

Video to image, text change

Browse files

Files changed (1) hide show

app.py +68 -48

app.py CHANGED Viewed

@@ -11,13 +11,13 @@ from fer import FER  # Facial emotion recognition
 # -----------------------------
 # Configuration
 # -----------------------------
-# 1) Increase skip rate
-SKIP_RATE = 15
-# 2) Use GPU if available
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# 3) Desired input size for faster inference
 DESIRED_SIZE = (640, 480)
 # -----------------------------
@@ -45,16 +45,16 @@ object_detection_model.eval().to(device)  # Move model to GPU (if available)
 obj_transform = transforms.Compose([transforms.ToTensor()])
-# If the FER library supports GPU, it may pick it up automatically.
-# Some versions allow device specification, e.g. FER(mtcnn=True, device=device).
 emotion_detector = FER(mtcnn=True)
 # -----------------------------
 # Overlay Drawing Functions
 # -----------------------------
 def draw_posture_overlay(raw_frame, landmarks):
     for (x, y) in landmarks:
-        cv2.circle(raw_frame, (x, y), 4, (0, 255, 0), -1)
     return raw_frame
 def draw_boxes_overlay(raw_frame, boxes, color):
@@ -66,22 +66,18 @@ def draw_boxes_overlay(raw_frame, boxes, color):
 # Heavy (Synchronous) Detection Functions
 # -----------------------------
 def compute_posture_overlay(image):
-    # Convert to BGR for MediaPipe
     frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
     h, w, _ = frame_bgr.shape
-    # 2) Downscale before processing (optional for posture)
     frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
     small_h, small_w, _ = frame_bgr_small.shape
     frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
     pose_results = pose.process(frame_rgb_small)
-    # Scale landmarks back up to original size if needed
     if pose_results.pose_landmarks:
         landmarks = []
         for lm in pose_results.pose_landmarks.landmark:
-            # Rescale from the smaller frame to the original size
             x = int(lm.x * small_w * (w / small_w))
             y = int(lm.y * small_h * (h / small_h))
             landmarks.append((x, y))
@@ -93,9 +89,7 @@ def compute_posture_overlay(image):
     return landmarks, text
 def compute_emotion_overlay(image):
-    # Convert to BGR
     frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
-    # 2) Downscale
     frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
     frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
@@ -109,7 +103,6 @@ def compute_emotion_overlay(image):
 def compute_objects_overlay(image):
     frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
-    # 2) Downscale
     frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
     frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
@@ -123,17 +116,13 @@ def compute_objects_overlay(image):
     boxes = []
     for box, score in zip(detections["boxes"], detections["scores"]):
         if score > threshold:
-            # box is in the scaled-down coordinates;
-            # you may want to scale them back to the original if needed
             boxes.append(tuple(box.int().cpu().numpy()))
     text = f"Detected {len(boxes)} object(s)" if boxes else "No objects detected"
     return boxes, text
 def compute_faces_overlay(image):
     frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
     h, w, _ = frame_bgr.shape
-    # 2) Downscale
     frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
     small_h, small_w, _ = frame_bgr_small.shape
@@ -148,8 +137,6 @@ def compute_faces_overlay(image):
             y = int(bbox.ymin * small_h)
             box_w = int(bbox.width * small_w)
             box_h = int(bbox.height * small_h)
-            # Scale bounding box coords back to original if you need full resolution
-            # E.g., x_original = int(x * (w / small_w)), etc.
             boxes.append((x, y, x + box_w, y + box_h))
         text = f"Detected {len(boxes)} face(s)"
     else:
@@ -157,13 +144,12 @@ def compute_faces_overlay(image):
     return boxes, text
 # -----------------------------
-# Main Analysis Functions
 # -----------------------------
 def analyze_posture_current(image):
     global posture_cache
     posture_cache["counter"] += 1
     current_frame = np.array(image)
     if posture_cache["counter"] % SKIP_RATE == 0 or posture_cache["landmarks"] is None:
         landmarks, text = compute_posture_overlay(image)
         posture_cache["landmarks"] = landmarks
@@ -173,24 +159,22 @@ def analyze_posture_current(image):
     if posture_cache["landmarks"]:
         output = draw_posture_overlay(output, posture_cache["landmarks"])
-    return output, f"Posture Analysis: {posture_cache['text']}"
 def analyze_emotion_current(image):
     global emotion_cache
     emotion_cache["counter"] += 1
     current_frame = np.array(image)
     if emotion_cache["counter"] % SKIP_RATE == 0 or emotion_cache["text"] is None:
         text = compute_emotion_overlay(image)
         emotion_cache["text"] = text
-    return current_frame, f"Emotion Analysis: {emotion_cache['text']}"
 def analyze_objects_current(image):
     global objects_cache
     objects_cache["counter"] += 1
     current_frame = np.array(image)
     if objects_cache["counter"] % SKIP_RATE == 0 or objects_cache["boxes"] is None:
         boxes, text = compute_objects_overlay(image)
         objects_cache["boxes"] = boxes
@@ -199,14 +183,12 @@ def analyze_objects_current(image):
     output = current_frame.copy()
     if objects_cache["boxes"]:
         output = draw_boxes_overlay(output, objects_cache["boxes"], (255, 255, 0))
-    return output, f"Object Detection: {objects_cache['text']}"
 def analyze_faces_current(image):
     global faces_cache
     faces_cache["counter"] += 1
     current_frame = np.array(image)
     if faces_cache["counter"] % SKIP_RATE == 0 or faces_cache["boxes"] is None:
         boxes, text = compute_faces_overlay(image)
         faces_cache["boxes"] = boxes
@@ -215,8 +197,38 @@ def analyze_faces_current(image):
     output = current_frame.copy()
     if faces_cache["boxes"]:
         output = draw_boxes_overlay(output, faces_cache["boxes"], (0, 0, 255))
-    return output, f"Face Detection: {faces_cache['text']}"
 # -----------------------------
 # Custom CSS
@@ -252,30 +264,30 @@ body {
 """
 # -----------------------------
-# Create Individual Interfaces
 # -----------------------------
 posture_interface = gr.Interface(
     fn=analyze_posture_current,
-    inputs=gr.Image(sources=["webcam"], streaming=True, label="Capture Your Posture"),
-    outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Posture Analysis")],
     title="Posture Analysis",
     description="Detects your posture using MediaPipe.",
-    live=True  # Keep only this interface live to avoid multiple heavy computations
 )
 emotion_interface = gr.Interface(
     fn=analyze_emotion_current,
-    inputs=gr.Image(sources=["webcam"], streaming=True, label="Capture Your Face"),
-    outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Emotion Analysis")],
     title="Emotion Analysis",
     description="Detects facial emotions using FER.",
-    live=False  # Turn off streaming to reduce overhead
 )
 objects_interface = gr.Interface(
     fn=analyze_objects_current,
-    inputs=gr.Image(sources=["webcam"], streaming=True, label="Capture the Scene"),
-    outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Object Detection")],
     title="Object Detection",
     description="Detects objects using a pretrained Faster R-CNN.",
     live=False
@@ -283,19 +295,28 @@ objects_interface = gr.Interface(
 faces_interface = gr.Interface(
     fn=analyze_faces_current,
-    inputs=gr.Image(sources=["webcam"], streaming=True, label="Capture Your Face"),
-    outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Face Detection")],
     title="Face Detection",
     description="Detects faces using MediaPipe.",
     live=False
 )
 # -----------------------------
 # Create a Tabbed Interface
 # -----------------------------
 tabbed_interface = gr.TabbedInterface(
-    interface_list=[posture_interface, emotion_interface, objects_interface, faces_interface],
-    tab_names=["Posture", "Emotion", "Objects", "Faces"]
 )
 # -----------------------------
@@ -303,10 +324,9 @@ tabbed_interface = gr.TabbedInterface(
 # -----------------------------
 demo = gr.Blocks(css=custom_css)
 with demo:
-    gr.Markdown("<h1 class='gradio-title'>Real-Time Multi-Analysis App</h1>")
     gr.Markdown(
-        "<p class='gradio-description'>Experience a high-tech cinematic interface for real-time "
-        "analysis of your posture, emotions, objects, and faces using your webcam.</p>"
     )
     tabbed_interface.render()

 # -----------------------------
 # Configuration
 # -----------------------------
+# For image processing, always run the analysis (no frame skipping)
+SKIP_RATE = 1
+# Use GPU if available
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Desired input size for faster inference
 DESIRED_SIZE = (640, 480)
 # -----------------------------
 obj_transform = transforms.Compose([transforms.ToTensor()])
+# Initialize the FER emotion detector
 emotion_detector = FER(mtcnn=True)
 # -----------------------------
 # Overlay Drawing Functions
 # -----------------------------
 def draw_posture_overlay(raw_frame, landmarks):
+    # Draw circles for each landmark using lime green (BGR: (50,205,50))
     for (x, y) in landmarks:
+        cv2.circle(raw_frame, (x, y), 4, (50, 205, 50), -1)
     return raw_frame
 def draw_boxes_overlay(raw_frame, boxes, color):
 # Heavy (Synchronous) Detection Functions
 # -----------------------------
 def compute_posture_overlay(image):
     frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
     h, w, _ = frame_bgr.shape
     frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
     small_h, small_w, _ = frame_bgr_small.shape
     frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
     pose_results = pose.process(frame_rgb_small)
     if pose_results.pose_landmarks:
         landmarks = []
         for lm in pose_results.pose_landmarks.landmark:
+            # Scale landmarks back to the original image size
             x = int(lm.x * small_w * (w / small_w))
             y = int(lm.y * small_h * (h / small_h))
             landmarks.append((x, y))
     return landmarks, text
 def compute_emotion_overlay(image):
     frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
     frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
     frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
 def compute_objects_overlay(image):
     frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
     frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
     frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
     boxes = []
     for box, score in zip(detections["boxes"], detections["scores"]):
         if score > threshold:
             boxes.append(tuple(box.int().cpu().numpy()))
     text = f"Detected {len(boxes)} object(s)" if boxes else "No objects detected"
     return boxes, text
 def compute_faces_overlay(image):
     frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
     h, w, _ = frame_bgr.shape
     frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
     small_h, small_w, _ = frame_bgr_small.shape
             y = int(bbox.ymin * small_h)
             box_w = int(bbox.width * small_w)
             box_h = int(bbox.height * small_h)
             boxes.append((x, y, x + box_w, y + box_h))
         text = f"Detected {len(boxes)} face(s)"
     else:
     return boxes, text
 # -----------------------------
+# Main Analysis Functions for Single Image
 # -----------------------------
 def analyze_posture_current(image):
     global posture_cache
     posture_cache["counter"] += 1
     current_frame = np.array(image)
     if posture_cache["counter"] % SKIP_RATE == 0 or posture_cache["landmarks"] is None:
         landmarks, text = compute_posture_overlay(image)
         posture_cache["landmarks"] = landmarks
     if posture_cache["landmarks"]:
         output = draw_posture_overlay(output, posture_cache["landmarks"])
+    return output, f"<div style='color: lime;'>Posture Analysis: {posture_cache['text']}</div>"
 def analyze_emotion_current(image):
     global emotion_cache
     emotion_cache["counter"] += 1
     current_frame = np.array(image)
     if emotion_cache["counter"] % SKIP_RATE == 0 or emotion_cache["text"] is None:
         text = compute_emotion_overlay(image)
         emotion_cache["text"] = text
+    return current_frame, f"<div style='color: lime;'>Emotion Analysis: {emotion_cache['text']}</div>"
 def analyze_objects_current(image):
     global objects_cache
     objects_cache["counter"] += 1
     current_frame = np.array(image)
     if objects_cache["counter"] % SKIP_RATE == 0 or objects_cache["boxes"] is None:
         boxes, text = compute_objects_overlay(image)
         objects_cache["boxes"] = boxes
     output = current_frame.copy()
     if objects_cache["boxes"]:
         output = draw_boxes_overlay(output, objects_cache["boxes"], (255, 255, 0))
+    return output, f"<div style='color: lime;'>Object Detection: {objects_cache['text']}</div>"
 def analyze_faces_current(image):
     global faces_cache
     faces_cache["counter"] += 1
     current_frame = np.array(image)
     if faces_cache["counter"] % SKIP_RATE == 0 or faces_cache["boxes"] is None:
         boxes, text = compute_faces_overlay(image)
         faces_cache["boxes"] = boxes
     output = current_frame.copy()
     if faces_cache["boxes"]:
         output = draw_boxes_overlay(output, faces_cache["boxes"], (0, 0, 255))
+    return output, f"<div style='color: lime;'>Face Detection: {faces_cache['text']}</div>"
+def analyze_all(image):
+    # Run all analyses on the same image
+    current_frame = np.array(image).copy()
+    # Posture Analysis
+    landmarks, posture_text = compute_posture_overlay(image)
+    if landmarks:
+        current_frame = draw_posture_overlay(current_frame, landmarks)
+    # Emotion Analysis
+    emotion_text = compute_emotion_overlay(image)
+    # Object Detection
+    boxes_obj, objects_text = compute_objects_overlay(image)
+    if boxes_obj:
+        current_frame = draw_boxes_overlay(current_frame, boxes_obj, (255, 255, 0))
+    # Face Detection
+    boxes_face, faces_text = compute_faces_overlay(image)
+    if boxes_face:
+        current_frame = draw_boxes_overlay(current_frame, boxes_face, (0, 0, 255))
+    combined_text = (
+        f"Posture Analysis: {posture_text}<br>"
+        f"Emotion Analysis: {emotion_text}<br>"
+        f"Object Detection: {objects_text}<br>"
+        f"Face Detection: {faces_text}"
+    )
+    combined_text_html = f"<div style='color: lime;'>{combined_text}</div>"
+    return current_frame, combined_text_html
 # -----------------------------
 # Custom CSS
 """
 # -----------------------------
+# Create Individual Interfaces for Image Processing
 # -----------------------------
 posture_interface = gr.Interface(
     fn=analyze_posture_current,
+    inputs=gr.Image(label="Upload an Image for Posture Analysis"),
+    outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.HTML(label="Posture Analysis")],
     title="Posture Analysis",
     description="Detects your posture using MediaPipe.",
+    live=False
 )
 emotion_interface = gr.Interface(
     fn=analyze_emotion_current,
+    inputs=gr.Image(label="Upload an Image for Emotion Analysis"),
+    outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.HTML(label="Emotion Analysis")],
     title="Emotion Analysis",
     description="Detects facial emotions using FER.",
+    live=False
 )
 objects_interface = gr.Interface(
     fn=analyze_objects_current,
+    inputs=gr.Image(label="Upload an Image for Object Detection"),
+    outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.HTML(label="Object Detection")],
     title="Object Detection",
     description="Detects objects using a pretrained Faster R-CNN.",
     live=False
 faces_interface = gr.Interface(
     fn=analyze_faces_current,
+    inputs=gr.Image(label="Upload an Image for Face Detection"),
+    outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.HTML(label="Face Detection")],
     title="Face Detection",
     description="Detects faces using MediaPipe.",
     live=False
 )
+all_interface = gr.Interface(
+    fn=analyze_all,
+    inputs=gr.Image(label="Upload an Image for All Inferences"),
+    outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.HTML(label="Combined Analysis")],
+    title="All Inferences",
+    description="Runs posture, emotion, object, and face detection all at once.",
+    live=False
+)
 # -----------------------------
 # Create a Tabbed Interface
 # -----------------------------
 tabbed_interface = gr.TabbedInterface(
+    interface_list=[posture_interface, emotion_interface, objects_interface, faces_interface, all_interface],
+    tab_names=["Posture", "Emotion", "Objects", "Faces", "All Inferences"]
 )
 # -----------------------------
 # -----------------------------
 demo = gr.Blocks(css=custom_css)
 with demo:
+    gr.Markdown("<h1 class='gradio-title'>Multi-Analysis Image App</h1>")
     gr.Markdown(
+        "<p class='gradio-description'>Upload an image to run analysis for posture, emotions, objects, and faces.</p>"
     )
     tabbed_interface.render()