Spaces:

eusholli
/

threat-detection

Sleeping

App Files Files Community

eusholli commited on Jul 31, 2024

Commit

143a483

1 Parent(s): 00a76be

initial commit

Browse files

Files changed (3) hide show

.gitignore +4 -1
README.md +3 -3
app.py +178 -45

.gitignore CHANGED Viewed

@@ -125,4 +125,7 @@ models/*
 !models/.gitkeep
 # All cached movie files
-*.mp4

 !models/.gitkeep
 # All cached movie files
+*.mp4
+# All cached model files
+*.pt

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: YOLO v8 Playground
 emoji: 🦀
 colorFrom: indigo
 colorTo: blue
@@ -10,11 +10,11 @@ pinned: false
 license: mit
 ---
-# Yolo v8 Playground
 Based on https://huggingface.co/spaces/eusholli/computer-vision-playground.
-This Streamlit application uses the Computer Vision Playground as its base and has modified the analyze_frame function, replacing the example face/sentiment detection with object detection.
 To learn how to do the same yourself and start playing with computer vision models read [here](https://huggingface.co/spaces/eusholli/computer-vision-playground/blob/main/README.md).

 ---
+title: Threat Detection
 emoji: 🦀
 colorFrom: indigo
 colorTo: blue
 license: mit
 ---
+# Threat Detection
 Based on https://huggingface.co/spaces/eusholli/computer-vision-playground.
+This Streamlit application uses the Computer Vision Playground as its base and has modified the analyze_frame function, replacing the example face/sentiment detection with pose detection.
 To learn how to do the same yourself and start playing with computer vision models read [here](https://huggingface.co/spaces/eusholli/computer-vision-playground/blob/main/README.md).

app.py CHANGED Viewed

@@ -13,84 +13,217 @@ from streamlit_webrtc import WebRtcMode, webrtc_streamer
 from utils.download import download_file
 from utils.turn import get_ice_servers
-from PIL import Image, ImageDraw  # Import PIL for image processing
-from transformers import pipeline  # Import Hugging Face transformers pipeline
 import requests
-from io import BytesIO  # Import for handling byte streams
 # CHANGE CODE BELOW HERE, USE TO REPLACE WITH YOUR WANTED ANALYSIS.
 # Update below string to set display title of analysis
-# Default title - "Facial Sentiment Analysis"
-ANALYSIS_TITLE = "YOLO-8 Object Detection Analysis"
-# Load the YOLOv8 model
-model = YOLO("yolov8n.pt")
-# CHANGE THE CONTENTS OF THIS FUNCTION, USE TO REPLACE WITH YOUR WANTED ANALYSIS.
-#
-# Set analysis results in img_container and result queue for display
-# img_container["input"] - holds the input frame contents - of type np.ndarray
-# img_container["analyzed"] - holds the analyzed frame with any added annotations - of type np.ndarray
-# img_container["analysis_time"] - holds how long the analysis has taken in miliseconds
-# result_queue - holds the analysis metadata results - of type dictionary
 def analyze_frame(frame: np.ndarray):
-    start_time = time.time()  # Start timing the analysis
-    img_container["input"] = frame  # Store the input frame
-    frame = frame.copy()  # Create a copy of the frame to modify
-    # Run YOLOv8 tracking on the frame, persisting tracks between frames
-    results = model.track(frame, persist=True)
-    # Initialize a list to store Detection objects
     detections = []
-    object_counter = 1
-    # Iterate over the detected boxes
-    for box in results[0].boxes:
-        detection = {}
-        # Extract class id, label, score, and bounding box coordinates
         class_id = int(box.cls)
-        detection["id"] = object_counter
-        detection["label"] = model.names[class_id]
-        detection["score"] = float(box.conf)
-        detection["box_coords"] = [round(value.item(), 2)
-                                   for value in box.xyxy.flatten()]
         detections.append(detection)
-        object_counter += 1
-    # Visualize the results on the frame
-    frame = results[0].plot()
-    end_time = time.time()  # End timing the analysis
-    execution_time_ms = round(
-        (end_time - start_time) * 1000, 2
-    )  # Calculate execution time in milliseconds
-    # Store the execution time
     img_container["analysis_time"] = execution_time_ms
-    # store the detections
     img_container["detections"] = detections
-    img_container["analyzed"] = frame  # Store the analyzed frame
-    return  # End of the function
 #
 #
 # DO NOT TOUCH THE BELOW CODE (NOT NEEDED)
 #
 #
 # Suppress FFmpeg logs
 os.environ["FFMPEG_LOG_LEVEL"] = "quiet"

 from utils.download import download_file
 from utils.turn import get_ice_servers
+from PIL import Image
 import requests
+from io import BytesIO
 # CHANGE CODE BELOW HERE, USE TO REPLACE WITH YOUR WANTED ANALYSIS.
 # Update below string to set display title of analysis
+ANALYSIS_TITLE = "YOLO-8 Pose and Efficient Action Detection"
+# Load the YOLOv8 model for pose estimation
+pose_model = YOLO("yolov8n-pose.pt")
+def detect_action(keypoints, prev_keypoints=None):
+    keypoint_dict = {
+        0: "Nose", 1: "Left Eye", 2: "Right Eye", 3: "Left Ear", 4: "Right Ear",
+        5: "Left Shoulder", 6: "Right Shoulder", 7: "Left Elbow", 8: "Right Elbow",
+        9: "Left Wrist", 10: "Right Wrist", 11: "Left Hip", 12: "Right Hip",
+        13: "Left Knee", 14: "Right Knee", 15: "Left Ankle", 16: "Right Ankle"
+    }
+    confidence_threshold = 0.5
+    movement_threshold = 0.05
+    def get_keypoint(idx):
+        if idx < len(keypoints[0]):
+            x, y, conf = keypoints[0][idx]
+            return np.array([x, y]) if conf > confidence_threshold else None
+        return None
+    def calculate_angle(a, b, c):
+        if a is None or b is None or c is None:
+            return None
+        ba = a - b
+        bc = c - b
+        cosine_angle = np.dot(ba, bc) / \
+            (np.linalg.norm(ba) * np.linalg.norm(bc))
+        angle = np.arccos(cosine_angle)
+        return np.degrees(angle)
+    def calculate_movement(current, previous):
+        if current is None or previous is None:
+            return None
+        return np.linalg.norm(current - previous)
+    nose = get_keypoint(0)
+    left_shoulder = get_keypoint(5)
+    right_shoulder = get_keypoint(6)
+    left_elbow = get_keypoint(7)
+    right_elbow = get_keypoint(8)
+    left_wrist = get_keypoint(9)
+    right_wrist = get_keypoint(10)
+    left_hip = get_keypoint(11)
+    right_hip = get_keypoint(12)
+    left_knee = get_keypoint(13)
+    right_knee = get_keypoint(14)
+    left_ankle = get_keypoint(15)
+    right_ankle = get_keypoint(16)
+    if all(kp is None for kp in [nose, left_shoulder, right_shoulder, left_hip, right_hip, left_ankle, right_ankle]):
+        return "waiting"
+    # Calculate midpoints
+    shoulder_midpoint = (left_shoulder + right_shoulder) / \
+        2 if left_shoulder is not None and right_shoulder is not None else None
+    hip_midpoint = (left_hip + right_hip) / \
+        2 if left_hip is not None and right_hip is not None else None
+    ankle_midpoint = (left_ankle + right_ankle) / \
+        2 if left_ankle is not None and right_ankle is not None else None
+    # Calculate angles
+    spine_angle = calculate_angle(
+        shoulder_midpoint, hip_midpoint, ankle_midpoint)
+    left_arm_angle = calculate_angle(left_shoulder, left_elbow, left_wrist)
+    right_arm_angle = calculate_angle(right_shoulder, right_elbow, right_wrist)
+    left_leg_angle = calculate_angle(left_hip, left_knee, left_ankle)
+    right_leg_angle = calculate_angle(right_hip, right_knee, right_ankle)
+    # Calculate movement
+    movement = None
+    if prev_keypoints is not None:
+        prev_ankle_midpoint = ((prev_keypoints[0][15][:2] + prev_keypoints[0][16][:2]) / 2
+                               if len(prev_keypoints[0]) > 16 else None)
+        movement = calculate_movement(ankle_midpoint, prev_ankle_midpoint)
+    # Detect actions
+    if spine_angle is not None:
+        if spine_angle > 160:
+            if movement is not None and movement > movement_threshold:
+                if movement > movement_threshold * 3:
+                    return "running"
+                else:
+                    return "walking"
+            return "standing"
+        elif 70 < spine_angle < 110:
+            return "sitting"
+        elif spine_angle < 30:
+            return "lying"
+    # Detect pointing
+    if (left_arm_angle is not None and left_arm_angle > 150) or (right_arm_angle is not None and right_arm_angle > 150):
+        return "pointing"
+    # Detect kicking
+    if (left_leg_angle is not None and left_leg_angle > 120) or (right_leg_angle is not None and right_leg_angle > 120):
+        return "kicking"
+    # Detect hitting
+    if ((left_arm_angle is not None and 80 < left_arm_angle < 120) or
+            (right_arm_angle is not None and 80 < right_arm_angle < 120)):
+        if movement is not None and movement > movement_threshold * 2:
+            return "hitting"
+    return "waiting"
 def analyze_frame(frame: np.ndarray):
+    start_time = time.time()
+    img_container["input"] = frame
+    frame = frame.copy()
+    # Run YOLOv8 pose estimation on the frame
+    pose_results = pose_model(frame)
     detections = []
+    for i, box in enumerate(pose_results[0].boxes):
         class_id = int(box.cls)
+        detection = {
+            "label": pose_model.names[class_id],
+            "score": float(box.conf),
+            "box_coords": [round(value.item(), 2) for value in box.xyxy.flatten()]
+        }
+        # Get keypoints for this detection if available
+        try:
+            if pose_results[0].keypoints is not None:
+                keypoints = pose_results[0].keypoints[i].data.cpu().numpy()
+                # Detect action using the keypoints
+                prev_keypoints = img_container.get("prev_keypoints")
+                action = detect_action(keypoints, prev_keypoints)
+                detection["action"] = action
+                # Store current keypoints for next frame
+                img_container["prev_keypoints"] = keypoints
+            else:
+                detection["action"] = "No keypoint data"
+        except IndexError:
+            detection["action"] = "Action detection failed"
         detections.append(detection)
+    # Draw pose keypoints without bounding boxes
+    frame = pose_results[0].plot(boxes=False, labels=False, kpt_line=True)
+    for detection in detections:
+        label = f"{detection['label']} {detection['score']:.2f}"
+        action = detection['action']
+        # Get bounding box coordinates
+        x1, y1, x2, y2 = detection["box_coords"]
+        # Increase font size and thickness
+        font_scale = 0.7
+        thickness = 2
+        # Get text size for label and action
+        (label_width, label_height), _ = cv2.getTextSize(
+            label, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)
+        (action_width, action_height), _ = cv2.getTextSize(
+            action, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)
+        # Calculate positions for centered labels at the top of the box
+        label_x = int((x1 + x2) / 2)
+        label_y = int(y1) - 10  # 10 pixels above the top of the box
+        action_y = label_y - label_height - 10  # 10 pixels above the label
+        # Draw yellow background for label
+        cv2.rectangle(frame, (label_x - label_width // 2 - 5, label_y - label_height - 5),
+                      (label_x + label_width // 2 + 5, label_y + 5), (0, 255, 255), -1)
+        # Draw yellow background for action
+        cv2.rectangle(frame, (label_x - action_width // 2 - 5, action_y - action_height - 5),
+                      (label_x + action_width // 2 + 5, action_y + 5), (0, 255, 255), -1)
+        # Draw black text for label
+        cv2.putText(frame, label, (label_x - label_width // 2, label_y),
+                    cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 0), thickness)
+        # Draw black text for action
+        cv2.putText(frame, action, (label_x - action_width // 2, action_y),
+                    cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 0), thickness)
+    end_time = time.time()
+    execution_time_ms = round((end_time - start_time) * 1000, 2)
     img_container["analysis_time"] = execution_time_ms
     img_container["detections"] = detections
+    img_container["analyzed"] = frame
+    return
+#
 #
 #
 # DO NOT TOUCH THE BELOW CODE (NOT NEEDED)
 #
 #
 # Suppress FFmpeg logs
 os.environ["FFMPEG_LOG_LEVEL"] = "quiet"