Spaces:
Sleeping
Sleeping
initial commit
Browse files- .gitignore +4 -1
- README.md +3 -3
- app.py +178 -45
.gitignore
CHANGED
@@ -125,4 +125,7 @@ models/*
|
|
125 |
!models/.gitkeep
|
126 |
|
127 |
# All cached movie files
|
128 |
-
*.mp4
|
|
|
|
|
|
|
|
125 |
!models/.gitkeep
|
126 |
|
127 |
# All cached movie files
|
128 |
+
*.mp4
|
129 |
+
|
130 |
+
# All cached model files
|
131 |
+
*.pt
|
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
emoji: 🦀
|
4 |
colorFrom: indigo
|
5 |
colorTo: blue
|
@@ -10,11 +10,11 @@ pinned: false
|
|
10 |
license: mit
|
11 |
---
|
12 |
|
13 |
-
#
|
14 |
|
15 |
Based on https://huggingface.co/spaces/eusholli/computer-vision-playground.
|
16 |
|
17 |
-
This Streamlit application uses the Computer Vision Playground as its base and has modified the analyze_frame function, replacing the example face/sentiment detection with
|
18 |
|
19 |
To learn how to do the same yourself and start playing with computer vision models read [here](https://huggingface.co/spaces/eusholli/computer-vision-playground/blob/main/README.md).
|
20 |
|
|
|
1 |
---
|
2 |
+
title: Threat Detection
|
3 |
emoji: 🦀
|
4 |
colorFrom: indigo
|
5 |
colorTo: blue
|
|
|
10 |
license: mit
|
11 |
---
|
12 |
|
13 |
+
# Threat Detection
|
14 |
|
15 |
Based on https://huggingface.co/spaces/eusholli/computer-vision-playground.
|
16 |
|
17 |
+
This Streamlit application uses the Computer Vision Playground as its base and has modified the analyze_frame function, replacing the example face/sentiment detection with pose detection.
|
18 |
|
19 |
To learn how to do the same yourself and start playing with computer vision models read [here](https://huggingface.co/spaces/eusholli/computer-vision-playground/blob/main/README.md).
|
20 |
|
app.py
CHANGED
@@ -13,84 +13,217 @@ from streamlit_webrtc import WebRtcMode, webrtc_streamer
|
|
13 |
from utils.download import download_file
|
14 |
from utils.turn import get_ice_servers
|
15 |
|
16 |
-
from PIL import Image
|
17 |
-
from transformers import pipeline # Import Hugging Face transformers pipeline
|
18 |
-
|
19 |
import requests
|
20 |
-
from io import BytesIO
|
21 |
-
|
22 |
|
23 |
# CHANGE CODE BELOW HERE, USE TO REPLACE WITH YOUR WANTED ANALYSIS.
|
24 |
# Update below string to set display title of analysis
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
ANALYSIS_TITLE = "YOLO-8 Object Detection Analysis"
|
29 |
|
|
|
|
|
30 |
|
31 |
-
# Load the YOLOv8 model
|
32 |
-
model = YOLO("yolov8n.pt")
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
|
39 |
-
# Set analysis results in img_container and result queue for display
|
40 |
-
# img_container["input"] - holds the input frame contents - of type np.ndarray
|
41 |
-
# img_container["analyzed"] - holds the analyzed frame with any added annotations - of type np.ndarray
|
42 |
-
# img_container["analysis_time"] - holds how long the analysis has taken in miliseconds
|
43 |
-
# result_queue - holds the analysis metadata results - of type dictionary
|
44 |
def analyze_frame(frame: np.ndarray):
|
45 |
-
start_time = time.time()
|
46 |
-
img_container["input"] = frame
|
47 |
-
frame = frame.copy()
|
48 |
|
49 |
-
# Run YOLOv8
|
50 |
-
|
51 |
|
52 |
-
# Initialize a list to store Detection objects
|
53 |
detections = []
|
54 |
-
object_counter = 1
|
55 |
|
56 |
-
|
57 |
-
for box in results[0].boxes:
|
58 |
-
detection = {}
|
59 |
-
# Extract class id, label, score, and bounding box coordinates
|
60 |
class_id = int(box.cls)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
67 |
|
68 |
detections.append(detection)
|
69 |
-
object_counter += 1
|
70 |
|
71 |
-
#
|
72 |
-
frame =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
img_container["analysis_time"] = execution_time_ms
|
80 |
|
81 |
-
# store the detections
|
82 |
img_container["detections"] = detections
|
83 |
-
img_container["analyzed"] = frame
|
84 |
-
|
85 |
-
return # End of the function
|
86 |
|
|
|
87 |
|
|
|
88 |
#
|
89 |
#
|
90 |
# DO NOT TOUCH THE BELOW CODE (NOT NEEDED)
|
91 |
#
|
92 |
#
|
93 |
|
|
|
94 |
# Suppress FFmpeg logs
|
95 |
os.environ["FFMPEG_LOG_LEVEL"] = "quiet"
|
96 |
|
|
|
13 |
from utils.download import download_file
|
14 |
from utils.turn import get_ice_servers
|
15 |
|
16 |
+
from PIL import Image
|
|
|
|
|
17 |
import requests
|
18 |
+
from io import BytesIO
|
|
|
19 |
|
20 |
# CHANGE CODE BELOW HERE, USE TO REPLACE WITH YOUR WANTED ANALYSIS.
|
21 |
# Update below string to set display title of analysis
|
22 |
|
23 |
+
ANALYSIS_TITLE = "YOLO-8 Pose and Efficient Action Detection"
|
|
|
|
|
24 |
|
25 |
+
# Load the YOLOv8 model for pose estimation
|
26 |
+
pose_model = YOLO("yolov8n-pose.pt")
|
27 |
|
|
|
|
|
28 |
|
29 |
+
def detect_action(keypoints, prev_keypoints=None):
|
30 |
+
keypoint_dict = {
|
31 |
+
0: "Nose", 1: "Left Eye", 2: "Right Eye", 3: "Left Ear", 4: "Right Ear",
|
32 |
+
5: "Left Shoulder", 6: "Right Shoulder", 7: "Left Elbow", 8: "Right Elbow",
|
33 |
+
9: "Left Wrist", 10: "Right Wrist", 11: "Left Hip", 12: "Right Hip",
|
34 |
+
13: "Left Knee", 14: "Right Knee", 15: "Left Ankle", 16: "Right Ankle"
|
35 |
+
}
|
36 |
|
37 |
+
confidence_threshold = 0.5
|
38 |
+
movement_threshold = 0.05
|
39 |
+
|
40 |
+
def get_keypoint(idx):
|
41 |
+
if idx < len(keypoints[0]):
|
42 |
+
x, y, conf = keypoints[0][idx]
|
43 |
+
return np.array([x, y]) if conf > confidence_threshold else None
|
44 |
+
return None
|
45 |
+
|
46 |
+
def calculate_angle(a, b, c):
|
47 |
+
if a is None or b is None or c is None:
|
48 |
+
return None
|
49 |
+
ba = a - b
|
50 |
+
bc = c - b
|
51 |
+
cosine_angle = np.dot(ba, bc) / \
|
52 |
+
(np.linalg.norm(ba) * np.linalg.norm(bc))
|
53 |
+
angle = np.arccos(cosine_angle)
|
54 |
+
return np.degrees(angle)
|
55 |
+
|
56 |
+
def calculate_movement(current, previous):
|
57 |
+
if current is None or previous is None:
|
58 |
+
return None
|
59 |
+
return np.linalg.norm(current - previous)
|
60 |
+
|
61 |
+
nose = get_keypoint(0)
|
62 |
+
left_shoulder = get_keypoint(5)
|
63 |
+
right_shoulder = get_keypoint(6)
|
64 |
+
left_elbow = get_keypoint(7)
|
65 |
+
right_elbow = get_keypoint(8)
|
66 |
+
left_wrist = get_keypoint(9)
|
67 |
+
right_wrist = get_keypoint(10)
|
68 |
+
left_hip = get_keypoint(11)
|
69 |
+
right_hip = get_keypoint(12)
|
70 |
+
left_knee = get_keypoint(13)
|
71 |
+
right_knee = get_keypoint(14)
|
72 |
+
left_ankle = get_keypoint(15)
|
73 |
+
right_ankle = get_keypoint(16)
|
74 |
+
|
75 |
+
if all(kp is None for kp in [nose, left_shoulder, right_shoulder, left_hip, right_hip, left_ankle, right_ankle]):
|
76 |
+
return "waiting"
|
77 |
+
|
78 |
+
# Calculate midpoints
|
79 |
+
shoulder_midpoint = (left_shoulder + right_shoulder) / \
|
80 |
+
2 if left_shoulder is not None and right_shoulder is not None else None
|
81 |
+
hip_midpoint = (left_hip + right_hip) / \
|
82 |
+
2 if left_hip is not None and right_hip is not None else None
|
83 |
+
ankle_midpoint = (left_ankle + right_ankle) / \
|
84 |
+
2 if left_ankle is not None and right_ankle is not None else None
|
85 |
+
|
86 |
+
# Calculate angles
|
87 |
+
spine_angle = calculate_angle(
|
88 |
+
shoulder_midpoint, hip_midpoint, ankle_midpoint)
|
89 |
+
left_arm_angle = calculate_angle(left_shoulder, left_elbow, left_wrist)
|
90 |
+
right_arm_angle = calculate_angle(right_shoulder, right_elbow, right_wrist)
|
91 |
+
left_leg_angle = calculate_angle(left_hip, left_knee, left_ankle)
|
92 |
+
right_leg_angle = calculate_angle(right_hip, right_knee, right_ankle)
|
93 |
+
|
94 |
+
# Calculate movement
|
95 |
+
movement = None
|
96 |
+
if prev_keypoints is not None:
|
97 |
+
prev_ankle_midpoint = ((prev_keypoints[0][15][:2] + prev_keypoints[0][16][:2]) / 2
|
98 |
+
if len(prev_keypoints[0]) > 16 else None)
|
99 |
+
movement = calculate_movement(ankle_midpoint, prev_ankle_midpoint)
|
100 |
+
|
101 |
+
# Detect actions
|
102 |
+
if spine_angle is not None:
|
103 |
+
if spine_angle > 160:
|
104 |
+
if movement is not None and movement > movement_threshold:
|
105 |
+
if movement > movement_threshold * 3:
|
106 |
+
return "running"
|
107 |
+
else:
|
108 |
+
return "walking"
|
109 |
+
return "standing"
|
110 |
+
elif 70 < spine_angle < 110:
|
111 |
+
return "sitting"
|
112 |
+
elif spine_angle < 30:
|
113 |
+
return "lying"
|
114 |
+
|
115 |
+
# Detect pointing
|
116 |
+
if (left_arm_angle is not None and left_arm_angle > 150) or (right_arm_angle is not None and right_arm_angle > 150):
|
117 |
+
return "pointing"
|
118 |
+
|
119 |
+
# Detect kicking
|
120 |
+
if (left_leg_angle is not None and left_leg_angle > 120) or (right_leg_angle is not None and right_leg_angle > 120):
|
121 |
+
return "kicking"
|
122 |
+
|
123 |
+
# Detect hitting
|
124 |
+
if ((left_arm_angle is not None and 80 < left_arm_angle < 120) or
|
125 |
+
(right_arm_angle is not None and 80 < right_arm_angle < 120)):
|
126 |
+
if movement is not None and movement > movement_threshold * 2:
|
127 |
+
return "hitting"
|
128 |
+
|
129 |
+
return "waiting"
|
130 |
|
131 |
|
|
|
|
|
|
|
|
|
|
|
132 |
def analyze_frame(frame: np.ndarray):
|
133 |
+
start_time = time.time()
|
134 |
+
img_container["input"] = frame
|
135 |
+
frame = frame.copy()
|
136 |
|
137 |
+
# Run YOLOv8 pose estimation on the frame
|
138 |
+
pose_results = pose_model(frame)
|
139 |
|
|
|
140 |
detections = []
|
|
|
141 |
|
142 |
+
for i, box in enumerate(pose_results[0].boxes):
|
|
|
|
|
|
|
143 |
class_id = int(box.cls)
|
144 |
+
detection = {
|
145 |
+
"label": pose_model.names[class_id],
|
146 |
+
"score": float(box.conf),
|
147 |
+
"box_coords": [round(value.item(), 2) for value in box.xyxy.flatten()]
|
148 |
+
}
|
149 |
+
|
150 |
+
# Get keypoints for this detection if available
|
151 |
+
try:
|
152 |
+
if pose_results[0].keypoints is not None:
|
153 |
+
keypoints = pose_results[0].keypoints[i].data.cpu().numpy()
|
154 |
+
|
155 |
+
# Detect action using the keypoints
|
156 |
+
prev_keypoints = img_container.get("prev_keypoints")
|
157 |
+
action = detect_action(keypoints, prev_keypoints)
|
158 |
+
detection["action"] = action
|
159 |
|
160 |
+
# Store current keypoints for next frame
|
161 |
+
img_container["prev_keypoints"] = keypoints
|
162 |
+
else:
|
163 |
+
detection["action"] = "No keypoint data"
|
164 |
+
except IndexError:
|
165 |
+
detection["action"] = "Action detection failed"
|
166 |
|
167 |
detections.append(detection)
|
|
|
168 |
|
169 |
+
# Draw pose keypoints without bounding boxes
|
170 |
+
frame = pose_results[0].plot(boxes=False, labels=False, kpt_line=True)
|
171 |
+
|
172 |
+
for detection in detections:
|
173 |
+
label = f"{detection['label']} {detection['score']:.2f}"
|
174 |
+
action = detection['action']
|
175 |
+
|
176 |
+
# Get bounding box coordinates
|
177 |
+
x1, y1, x2, y2 = detection["box_coords"]
|
178 |
|
179 |
+
# Increase font size and thickness
|
180 |
+
font_scale = 0.7
|
181 |
+
thickness = 2
|
182 |
+
|
183 |
+
# Get text size for label and action
|
184 |
+
(label_width, label_height), _ = cv2.getTextSize(
|
185 |
+
label, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)
|
186 |
+
(action_width, action_height), _ = cv2.getTextSize(
|
187 |
+
action, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)
|
188 |
+
|
189 |
+
# Calculate positions for centered labels at the top of the box
|
190 |
+
label_x = int((x1 + x2) / 2)
|
191 |
+
label_y = int(y1) - 10 # 10 pixels above the top of the box
|
192 |
+
action_y = label_y - label_height - 10 # 10 pixels above the label
|
193 |
+
|
194 |
+
# Draw yellow background for label
|
195 |
+
cv2.rectangle(frame, (label_x - label_width // 2 - 5, label_y - label_height - 5),
|
196 |
+
(label_x + label_width // 2 + 5, label_y + 5), (0, 255, 255), -1)
|
197 |
+
|
198 |
+
# Draw yellow background for action
|
199 |
+
cv2.rectangle(frame, (label_x - action_width // 2 - 5, action_y - action_height - 5),
|
200 |
+
(label_x + action_width // 2 + 5, action_y + 5), (0, 255, 255), -1)
|
201 |
+
|
202 |
+
# Draw black text for label
|
203 |
+
cv2.putText(frame, label, (label_x - label_width // 2, label_y),
|
204 |
+
cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 0), thickness)
|
205 |
+
|
206 |
+
# Draw black text for action
|
207 |
+
cv2.putText(frame, action, (label_x - action_width // 2, action_y),
|
208 |
+
cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 0), thickness)
|
209 |
+
|
210 |
+
end_time = time.time()
|
211 |
+
execution_time_ms = round((end_time - start_time) * 1000, 2)
|
212 |
img_container["analysis_time"] = execution_time_ms
|
213 |
|
|
|
214 |
img_container["detections"] = detections
|
215 |
+
img_container["analyzed"] = frame
|
|
|
|
|
216 |
|
217 |
+
return
|
218 |
|
219 |
+
#
|
220 |
#
|
221 |
#
|
222 |
# DO NOT TOUCH THE BELOW CODE (NOT NEEDED)
|
223 |
#
|
224 |
#
|
225 |
|
226 |
+
|
227 |
# Suppress FFmpeg logs
|
228 |
os.environ["FFMPEG_LOG_LEVEL"] = "quiet"
|
229 |
|