eusholli commited on
Commit
143a483
·
1 Parent(s): 00a76be

initial commit

Browse files
Files changed (3) hide show
  1. .gitignore +4 -1
  2. README.md +3 -3
  3. app.py +178 -45
.gitignore CHANGED
@@ -125,4 +125,7 @@ models/*
125
  !models/.gitkeep
126
 
127
  # All cached movie files
128
- *.mp4
 
 
 
 
125
  !models/.gitkeep
126
 
127
  # All cached movie files
128
+ *.mp4
129
+
130
+ # All cached model files
131
+ *.pt
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: YOLO v8 Playground
3
  emoji: 🦀
4
  colorFrom: indigo
5
  colorTo: blue
@@ -10,11 +10,11 @@ pinned: false
10
  license: mit
11
  ---
12
 
13
- # Yolo v8 Playground
14
 
15
  Based on https://huggingface.co/spaces/eusholli/computer-vision-playground.
16
 
17
- This Streamlit application uses the Computer Vision Playground as its base and has modified the analyze_frame function, replacing the example face/sentiment detection with object detection.
18
 
19
  To learn how to do the same yourself and start playing with computer vision models read [here](https://huggingface.co/spaces/eusholli/computer-vision-playground/blob/main/README.md).
20
 
 
1
  ---
2
+ title: Threat Detection
3
  emoji: 🦀
4
  colorFrom: indigo
5
  colorTo: blue
 
10
  license: mit
11
  ---
12
 
13
+ # Threat Detection
14
 
15
  Based on https://huggingface.co/spaces/eusholli/computer-vision-playground.
16
 
17
+ This Streamlit application uses the Computer Vision Playground as its base and has modified the analyze_frame function, replacing the example face/sentiment detection with pose detection.
18
 
19
  To learn how to do the same yourself and start playing with computer vision models read [here](https://huggingface.co/spaces/eusholli/computer-vision-playground/blob/main/README.md).
20
 
app.py CHANGED
@@ -13,84 +13,217 @@ from streamlit_webrtc import WebRtcMode, webrtc_streamer
13
  from utils.download import download_file
14
  from utils.turn import get_ice_servers
15
 
16
- from PIL import Image, ImageDraw # Import PIL for image processing
17
- from transformers import pipeline # Import Hugging Face transformers pipeline
18
-
19
  import requests
20
- from io import BytesIO # Import for handling byte streams
21
-
22
 
23
  # CHANGE CODE BELOW HERE, USE TO REPLACE WITH YOUR WANTED ANALYSIS.
24
  # Update below string to set display title of analysis
25
 
26
- # Default title - "Facial Sentiment Analysis"
27
-
28
- ANALYSIS_TITLE = "YOLO-8 Object Detection Analysis"
29
 
 
 
30
 
31
- # Load the YOLOv8 model
32
- model = YOLO("yolov8n.pt")
33
 
 
 
 
 
 
 
 
34
 
35
- # CHANGE THE CONTENTS OF THIS FUNCTION, USE TO REPLACE WITH YOUR WANTED ANALYSIS.
36
- #
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
 
39
- # Set analysis results in img_container and result queue for display
40
- # img_container["input"] - holds the input frame contents - of type np.ndarray
41
- # img_container["analyzed"] - holds the analyzed frame with any added annotations - of type np.ndarray
42
- # img_container["analysis_time"] - holds how long the analysis has taken in miliseconds
43
- # result_queue - holds the analysis metadata results - of type dictionary
44
  def analyze_frame(frame: np.ndarray):
45
- start_time = time.time() # Start timing the analysis
46
- img_container["input"] = frame # Store the input frame
47
- frame = frame.copy() # Create a copy of the frame to modify
48
 
49
- # Run YOLOv8 tracking on the frame, persisting tracks between frames
50
- results = model.track(frame, persist=True)
51
 
52
- # Initialize a list to store Detection objects
53
  detections = []
54
- object_counter = 1
55
 
56
- # Iterate over the detected boxes
57
- for box in results[0].boxes:
58
- detection = {}
59
- # Extract class id, label, score, and bounding box coordinates
60
  class_id = int(box.cls)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- detection["id"] = object_counter
63
- detection["label"] = model.names[class_id]
64
- detection["score"] = float(box.conf)
65
- detection["box_coords"] = [round(value.item(), 2)
66
- for value in box.xyxy.flatten()]
 
67
 
68
  detections.append(detection)
69
- object_counter += 1
70
 
71
- # Visualize the results on the frame
72
- frame = results[0].plot()
 
 
 
 
 
 
 
73
 
74
- end_time = time.time() # End timing the analysis
75
- execution_time_ms = round(
76
- (end_time - start_time) * 1000, 2
77
- ) # Calculate execution time in milliseconds
78
- # Store the execution time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  img_container["analysis_time"] = execution_time_ms
80
 
81
- # store the detections
82
  img_container["detections"] = detections
83
- img_container["analyzed"] = frame # Store the analyzed frame
84
-
85
- return # End of the function
86
 
 
87
 
 
88
  #
89
  #
90
  # DO NOT TOUCH THE BELOW CODE (NOT NEEDED)
91
  #
92
  #
93
 
 
94
  # Suppress FFmpeg logs
95
  os.environ["FFMPEG_LOG_LEVEL"] = "quiet"
96
 
 
13
  from utils.download import download_file
14
  from utils.turn import get_ice_servers
15
 
16
+ from PIL import Image
 
 
17
  import requests
18
+ from io import BytesIO
 
19
 
20
  # CHANGE CODE BELOW HERE, USE TO REPLACE WITH YOUR WANTED ANALYSIS.
21
  # Update below string to set display title of analysis
22
 
23
+ ANALYSIS_TITLE = "YOLO-8 Pose and Efficient Action Detection"
 
 
24
 
25
+ # Load the YOLOv8 model for pose estimation
26
+ pose_model = YOLO("yolov8n-pose.pt")
27
 
 
 
28
 
29
+ def detect_action(keypoints, prev_keypoints=None):
30
+ keypoint_dict = {
31
+ 0: "Nose", 1: "Left Eye", 2: "Right Eye", 3: "Left Ear", 4: "Right Ear",
32
+ 5: "Left Shoulder", 6: "Right Shoulder", 7: "Left Elbow", 8: "Right Elbow",
33
+ 9: "Left Wrist", 10: "Right Wrist", 11: "Left Hip", 12: "Right Hip",
34
+ 13: "Left Knee", 14: "Right Knee", 15: "Left Ankle", 16: "Right Ankle"
35
+ }
36
 
37
+ confidence_threshold = 0.5
38
+ movement_threshold = 0.05
39
+
40
+ def get_keypoint(idx):
41
+ if idx < len(keypoints[0]):
42
+ x, y, conf = keypoints[0][idx]
43
+ return np.array([x, y]) if conf > confidence_threshold else None
44
+ return None
45
+
46
+ def calculate_angle(a, b, c):
47
+ if a is None or b is None or c is None:
48
+ return None
49
+ ba = a - b
50
+ bc = c - b
51
+ cosine_angle = np.dot(ba, bc) / \
52
+ (np.linalg.norm(ba) * np.linalg.norm(bc))
53
+ angle = np.arccos(cosine_angle)
54
+ return np.degrees(angle)
55
+
56
+ def calculate_movement(current, previous):
57
+ if current is None or previous is None:
58
+ return None
59
+ return np.linalg.norm(current - previous)
60
+
61
+ nose = get_keypoint(0)
62
+ left_shoulder = get_keypoint(5)
63
+ right_shoulder = get_keypoint(6)
64
+ left_elbow = get_keypoint(7)
65
+ right_elbow = get_keypoint(8)
66
+ left_wrist = get_keypoint(9)
67
+ right_wrist = get_keypoint(10)
68
+ left_hip = get_keypoint(11)
69
+ right_hip = get_keypoint(12)
70
+ left_knee = get_keypoint(13)
71
+ right_knee = get_keypoint(14)
72
+ left_ankle = get_keypoint(15)
73
+ right_ankle = get_keypoint(16)
74
+
75
+ if all(kp is None for kp in [nose, left_shoulder, right_shoulder, left_hip, right_hip, left_ankle, right_ankle]):
76
+ return "waiting"
77
+
78
+ # Calculate midpoints
79
+ shoulder_midpoint = (left_shoulder + right_shoulder) / \
80
+ 2 if left_shoulder is not None and right_shoulder is not None else None
81
+ hip_midpoint = (left_hip + right_hip) / \
82
+ 2 if left_hip is not None and right_hip is not None else None
83
+ ankle_midpoint = (left_ankle + right_ankle) / \
84
+ 2 if left_ankle is not None and right_ankle is not None else None
85
+
86
+ # Calculate angles
87
+ spine_angle = calculate_angle(
88
+ shoulder_midpoint, hip_midpoint, ankle_midpoint)
89
+ left_arm_angle = calculate_angle(left_shoulder, left_elbow, left_wrist)
90
+ right_arm_angle = calculate_angle(right_shoulder, right_elbow, right_wrist)
91
+ left_leg_angle = calculate_angle(left_hip, left_knee, left_ankle)
92
+ right_leg_angle = calculate_angle(right_hip, right_knee, right_ankle)
93
+
94
+ # Calculate movement
95
+ movement = None
96
+ if prev_keypoints is not None:
97
+ prev_ankle_midpoint = ((prev_keypoints[0][15][:2] + prev_keypoints[0][16][:2]) / 2
98
+ if len(prev_keypoints[0]) > 16 else None)
99
+ movement = calculate_movement(ankle_midpoint, prev_ankle_midpoint)
100
+
101
+ # Detect actions
102
+ if spine_angle is not None:
103
+ if spine_angle > 160:
104
+ if movement is not None and movement > movement_threshold:
105
+ if movement > movement_threshold * 3:
106
+ return "running"
107
+ else:
108
+ return "walking"
109
+ return "standing"
110
+ elif 70 < spine_angle < 110:
111
+ return "sitting"
112
+ elif spine_angle < 30:
113
+ return "lying"
114
+
115
+ # Detect pointing
116
+ if (left_arm_angle is not None and left_arm_angle > 150) or (right_arm_angle is not None and right_arm_angle > 150):
117
+ return "pointing"
118
+
119
+ # Detect kicking
120
+ if (left_leg_angle is not None and left_leg_angle > 120) or (right_leg_angle is not None and right_leg_angle > 120):
121
+ return "kicking"
122
+
123
+ # Detect hitting
124
+ if ((left_arm_angle is not None and 80 < left_arm_angle < 120) or
125
+ (right_arm_angle is not None and 80 < right_arm_angle < 120)):
126
+ if movement is not None and movement > movement_threshold * 2:
127
+ return "hitting"
128
+
129
+ return "waiting"
130
 
131
 
 
 
 
 
 
132
  def analyze_frame(frame: np.ndarray):
133
+ start_time = time.time()
134
+ img_container["input"] = frame
135
+ frame = frame.copy()
136
 
137
+ # Run YOLOv8 pose estimation on the frame
138
+ pose_results = pose_model(frame)
139
 
 
140
  detections = []
 
141
 
142
+ for i, box in enumerate(pose_results[0].boxes):
 
 
 
143
  class_id = int(box.cls)
144
+ detection = {
145
+ "label": pose_model.names[class_id],
146
+ "score": float(box.conf),
147
+ "box_coords": [round(value.item(), 2) for value in box.xyxy.flatten()]
148
+ }
149
+
150
+ # Get keypoints for this detection if available
151
+ try:
152
+ if pose_results[0].keypoints is not None:
153
+ keypoints = pose_results[0].keypoints[i].data.cpu().numpy()
154
+
155
+ # Detect action using the keypoints
156
+ prev_keypoints = img_container.get("prev_keypoints")
157
+ action = detect_action(keypoints, prev_keypoints)
158
+ detection["action"] = action
159
 
160
+ # Store current keypoints for next frame
161
+ img_container["prev_keypoints"] = keypoints
162
+ else:
163
+ detection["action"] = "No keypoint data"
164
+ except IndexError:
165
+ detection["action"] = "Action detection failed"
166
 
167
  detections.append(detection)
 
168
 
169
+ # Draw pose keypoints without bounding boxes
170
+ frame = pose_results[0].plot(boxes=False, labels=False, kpt_line=True)
171
+
172
+ for detection in detections:
173
+ label = f"{detection['label']} {detection['score']:.2f}"
174
+ action = detection['action']
175
+
176
+ # Get bounding box coordinates
177
+ x1, y1, x2, y2 = detection["box_coords"]
178
 
179
+ # Increase font size and thickness
180
+ font_scale = 0.7
181
+ thickness = 2
182
+
183
+ # Get text size for label and action
184
+ (label_width, label_height), _ = cv2.getTextSize(
185
+ label, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)
186
+ (action_width, action_height), _ = cv2.getTextSize(
187
+ action, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)
188
+
189
+ # Calculate positions for centered labels at the top of the box
190
+ label_x = int((x1 + x2) / 2)
191
+ label_y = int(y1) - 10 # 10 pixels above the top of the box
192
+ action_y = label_y - label_height - 10 # 10 pixels above the label
193
+
194
+ # Draw yellow background for label
195
+ cv2.rectangle(frame, (label_x - label_width // 2 - 5, label_y - label_height - 5),
196
+ (label_x + label_width // 2 + 5, label_y + 5), (0, 255, 255), -1)
197
+
198
+ # Draw yellow background for action
199
+ cv2.rectangle(frame, (label_x - action_width // 2 - 5, action_y - action_height - 5),
200
+ (label_x + action_width // 2 + 5, action_y + 5), (0, 255, 255), -1)
201
+
202
+ # Draw black text for label
203
+ cv2.putText(frame, label, (label_x - label_width // 2, label_y),
204
+ cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 0), thickness)
205
+
206
+ # Draw black text for action
207
+ cv2.putText(frame, action, (label_x - action_width // 2, action_y),
208
+ cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 0), thickness)
209
+
210
+ end_time = time.time()
211
+ execution_time_ms = round((end_time - start_time) * 1000, 2)
212
  img_container["analysis_time"] = execution_time_ms
213
 
 
214
  img_container["detections"] = detections
215
+ img_container["analyzed"] = frame
 
 
216
 
217
+ return
218
 
219
+ #
220
  #
221
  #
222
  # DO NOT TOUCH THE BELOW CODE (NOT NEEDED)
223
  #
224
  #
225
 
226
+
227
  # Suppress FFmpeg logs
228
  os.environ["FFMPEG_LOG_LEVEL"] = "quiet"
229