Spaces:

eusholli
/

threat-detection

Sleeping

App Files Files Community

eusholli commited on Jul 31, 2024

Commit

9a91192

1 Parent(s): 143a483

Added choice of object and/or pose detection

Browse files

Files changed (1) hide show

app.py +129 -82

app.py CHANGED Viewed

@@ -20,10 +20,11 @@ from io import BytesIO
 # CHANGE CODE BELOW HERE, USE TO REPLACE WITH YOUR WANTED ANALYSIS.
 # Update below string to set display title of analysis
-ANALYSIS_TITLE = "YOLO-8 Pose and Efficient Action Detection"
-# Load the YOLOv8 model for pose estimation
 pose_model = YOLO("yolov8n-pose.pt")
 def detect_action(keypoints, prev_keypoints=None):
@@ -134,78 +135,103 @@ def analyze_frame(frame: np.ndarray):
     img_container["input"] = frame
     frame = frame.copy()
-    # Run YOLOv8 pose estimation on the frame
-    pose_results = pose_model(frame)
     detections = []
-    for i, box in enumerate(pose_results[0].boxes):
-        class_id = int(box.cls)
-        detection = {
-            "label": pose_model.names[class_id],
-            "score": float(box.conf),
-            "box_coords": [round(value.item(), 2) for value in box.xyxy.flatten()]
-        }
-        # Get keypoints for this detection if available
-        try:
-            if pose_results[0].keypoints is not None:
-                keypoints = pose_results[0].keypoints[i].data.cpu().numpy()
-                # Detect action using the keypoints
-                prev_keypoints = img_container.get("prev_keypoints")
-                action = detect_action(keypoints, prev_keypoints)
-                detection["action"] = action
-                # Store current keypoints for next frame
-                img_container["prev_keypoints"] = keypoints
-            else:
-                detection["action"] = "No keypoint data"
-        except IndexError:
-            detection["action"] = "Action detection failed"
-        detections.append(detection)
-    # Draw pose keypoints without bounding boxes
-    frame = pose_results[0].plot(boxes=False, labels=False, kpt_line=True)
-    for detection in detections:
-        label = f"{detection['label']} {detection['score']:.2f}"
-        action = detection['action']
-        # Get bounding box coordinates
-        x1, y1, x2, y2 = detection["box_coords"]
-        # Increase font size and thickness
-        font_scale = 0.7
-        thickness = 2
-        # Get text size for label and action
-        (label_width, label_height), _ = cv2.getTextSize(
-            label, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)
-        (action_width, action_height), _ = cv2.getTextSize(
-            action, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)
-        # Calculate positions for centered labels at the top of the box
-        label_x = int((x1 + x2) / 2)
-        label_y = int(y1) - 10  # 10 pixels above the top of the box
-        action_y = label_y - label_height - 10  # 10 pixels above the label
-        # Draw yellow background for label
-        cv2.rectangle(frame, (label_x - label_width // 2 - 5, label_y - label_height - 5),
-                      (label_x + label_width // 2 + 5, label_y + 5), (0, 255, 255), -1)
-        # Draw yellow background for action
-        cv2.rectangle(frame, (label_x - action_width // 2 - 5, action_y - action_height - 5),
-                      (label_x + action_width // 2 + 5, action_y + 5), (0, 255, 255), -1)
-        # Draw black text for label
-        cv2.putText(frame, label, (label_x - label_width // 2, label_y),
-                    cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 0), thickness)
-        # Draw black text for action
-        cv2.putText(frame, action, (label_x - action_width // 2, action_y),
-                    cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 0), thickness)
     end_time = time.time()
     execution_time_ms = round((end_time - start_time) * 1000, 2)
@@ -328,6 +354,7 @@ with col1:
     # Text input for YouTube URL
     st.subheader("Enter a YouTube URL")
     youtube_url = st.text_input("YouTube URL")
     # File uploader for videos
     st.subheader("Upload a Video")
@@ -355,7 +382,9 @@ st.markdown(
 def analysis_init():
-    global analysis_time, show_labels, labels_placeholder, input_subheader, input_placeholder, output_placeholder
     with col2:
         st.header("Analysis")
@@ -364,9 +393,11 @@ def analysis_init():
         st.subheader("Output Frame")
         output_placeholder = st.empty()  # Placeholder for output frame
         analysis_time = st.empty()  # Placeholder for analysis time
-        show_labels = st.checkbox(
-            "Show the detected labels", value=True
-        )  # Checkbox to show/hide labels
         labels_placeholder = st.empty()  # Placeholder for labels
@@ -449,16 +480,28 @@ def process_video(video_path):
 # Function to get the video stream URL from YouTube using yt-dlp
 def get_youtube_stream_url(youtube_url):
     ydl_opts = {
-        'format': 'best[ext=mp4]',
         'quiet': True,
     }
     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-        info_dict = ydl.extract_info(youtube_url, download=False)
-        stream_url = info_dict['url']
-    return stream_url
 # If a YouTube URL is provided, process the video
@@ -467,7 +510,11 @@ if youtube_url:
     stream_url = get_youtube_stream_url(youtube_url)
-    process_video(stream_url)  # Process the video
 # If a video is uploaded or a URL is provided, process the video
 if uploaded_video is not None or video_url:

 # CHANGE CODE BELOW HERE, USE TO REPLACE WITH YOUR WANTED ANALYSIS.
 # Update below string to set display title of analysis
+ANALYSIS_TITLE = "YOLO-8 Object Detection, Pose Estimation, and Action Detection"
+# Load the YOLOv8 models
 pose_model = YOLO("yolov8n-pose.pt")
+object_model = YOLO("yolov8n.pt")
 def detect_action(keypoints, prev_keypoints=None):
     img_container["input"] = frame
     frame = frame.copy()
     detections = []
+    if show_labels in ["Object Detection", "Both"]:
+        # Run YOLOv8 object detection on the frame
+        object_results = object_model(frame)
+        for i, box in enumerate(object_results[0].boxes):
+            class_id = int(box.cls)
+            detection = {
+                "label": object_model.names[class_id],
+                "score": float(box.conf),
+                "box_coords": [round(value.item(), 2) for value in box.xyxy.flatten()]
+            }
+            detections.append(detection)
+    if show_labels in ["Pose Estimation", "Both"]:
+        # Run YOLOv8 pose estimation on the frame
+        pose_results = pose_model(frame)
+        for i, box in enumerate(pose_results[0].boxes):
+            class_id = int(box.cls)
+            detection = {
+                "label": pose_model.names[class_id],
+                "score": float(box.conf),
+                "box_coords": [round(value.item(), 2) for value in box.xyxy.flatten()]
+            }
+            # Get keypoints for this detection if available
+            try:
+                if pose_results[0].keypoints is not None:
+                    keypoints = pose_results[0].keypoints[i].data.cpu().numpy()
+                    # Detect action using the keypoints
+                    prev_keypoints = img_container.get("prev_keypoints")
+                    action = detect_action(keypoints, prev_keypoints)
+                    detection["action"] = action
+                    # Store current keypoints for next frame
+                    img_container["prev_keypoints"] = keypoints
+                    # Calculate the average position of visible keypoints
+                    visible_keypoints = keypoints[0][keypoints[0]
+                                                     [:, 2] > 0.5][:, :2]
+                    if len(visible_keypoints) > 0:
+                        label_x, label_y = np.mean(
+                            visible_keypoints, axis=0).astype(int)
+                    else:
+                        # Fallback to the center of the bounding box if no keypoints are visible
+                        x1, y1, x2, y2 = detection["box_coords"]
+                        label_x = int((x1 + x2) / 2)
+                        label_y = int((y1 + y2) / 2)
+                else:
+                    detection["action"] = "No keypoint data"
+                    # Use the center of the bounding box for label position
+                    x1, y1, x2, y2 = detection["box_coords"]
+                    label_x = int((x1 + x2) / 2)
+                    label_y = int((y1 + y2) / 2)
+            except IndexError:
+                detection["action"] = "Action detection failed"
+                # Use the center of the bounding box for label position
+                x1, y1, x2, y2 = detection["box_coords"]
+                label_x = int((x1 + x2) / 2)
+                label_y = int((y1 + y2) / 2)
+            # Only display the action as the label
+            label = detection.get('action', '')
+            # Increase font scale and thickness to match box label size
+            font_scale = 2.0
+            thickness = 2
+            # Get text size for label
+            (label_width, label_height), _ = cv2.getTextSize(
+                label, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)
+            # Calculate position for centered label
+            label_y = label_y - 10  # 10 pixels above the calculated position
+            # Draw yellow background for label
+            cv2.rectangle(frame, (label_x - label_width // 2 - 5, label_y - label_height - 5),
+                          (label_x + label_width // 2 + 5, label_y + 5), (0, 255, 255), -1)
+            # Draw black text for label
+            cv2.putText(frame, label, (label_x - label_width // 2, label_y),
+                        cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 0), thickness)
+            detections.append(detection)
+    # Draw detections on the frame
+    if show_labels == "Object Detection":
+        frame = object_results[0].plot()
+    elif show_labels == "Pose Estimation":
+        frame = pose_results[0].plot(boxes=False, labels=False, kpt_line=True)
+    else:  # Both
+        frame = object_results[0].plot()
+        frame = pose_results[0].plot(
+            boxes=False, labels=False, kpt_line=True, img=frame)
     end_time = time.time()
     execution_time_ms = round((end_time - start_time) * 1000, 2)
     # Text input for YouTube URL
     st.subheader("Enter a YouTube URL")
     youtube_url = st.text_input("YouTube URL")
+    yt_error = st.empty()  # Placeholder for analysis time
     # File uploader for videos
     st.subheader("Upload a Video")
 def analysis_init():
+    global yt_error, analysis_time, show_labels, labels_placeholder, input_subheader, input_placeholder, output_placeholder
+    yt_error.empty()  # Placeholder for analysis time
     with col2:
         st.header("Analysis")
         st.subheader("Output Frame")
         output_placeholder = st.empty()  # Placeholder for output frame
         analysis_time = st.empty()  # Placeholder for analysis time
+        show_labels = st.radio(
+            "Choose Detection Type",
+            ("Object Detection", "Pose Estimation", "Both"),
+            index=2  # Set default to "Both" (index 2)
+        )
         labels_placeholder = st.empty()  # Placeholder for labels
 # Function to get the video stream URL from YouTube using yt-dlp
 def get_youtube_stream_url(youtube_url):
     ydl_opts = {
+        'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
         'quiet': True,
+        'no_warnings': True,
     }
     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        try:
+            info_dict = ydl.extract_info(youtube_url, download=False)
+            if 'url' in info_dict:
+                return info_dict['url']
+            elif 'entries' in info_dict:
+                return info_dict['entries'][0]['url']
+            else:
+                yt_error.error(
+                    "Unable to extract video URL. The video might be unavailable or restricted.")
+                return None
+        except yt_dlp.utils.DownloadError as e:
+            yt_error.error(
+                f"Error: Unable to process the YouTube URL. {str(e)}")
+            return None
 # If a YouTube URL is provided, process the video
     stream_url = get_youtube_stream_url(youtube_url)
+    if stream_url:
+        process_video(stream_url)  # Process the video
+    else:
+        yt_error.error(
+            "Unable to process the YouTube video. Please try a different URL or video format.")
 # If a video is uploaded or a URL is provided, process the video
 if uploaded_video is not None or video_url: