Spaces:
Sleeping
Sleeping
Update utils.py
Browse files
utils.py
CHANGED
@@ -2,6 +2,7 @@ import cv2
|
|
2 |
import mediapipe as mp
|
3 |
import numpy as np
|
4 |
|
|
|
5 |
correct = cv2.imread('right.png')
|
6 |
correct = cv2.cvtColor(correct, cv2.COLOR_BGR2RGB)
|
7 |
incorrect = cv2.imread('wrong.png')
|
@@ -9,19 +10,30 @@ incorrect = cv2.cvtColor(incorrect, cv2.COLOR_BGR2RGB)
|
|
9 |
|
10 |
def draw_rounded_rect(img, rect_start, rect_end, corner_width, box_color):
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
x1, y1 = rect_start
|
13 |
x2, y2 = rect_end
|
14 |
w = corner_width
|
15 |
|
16 |
-
#
|
17 |
cv2.rectangle(img, (x1 + w, y1), (x2 - w, y1 + w), box_color, -1)
|
18 |
cv2.rectangle(img, (x1 + w, y2 - w), (x2 - w, y2), box_color, -1)
|
19 |
cv2.rectangle(img, (x1, y1 + w), (x1 + w, y2 - w), box_color, -1)
|
20 |
cv2.rectangle(img, (x2 - w, y1 + w), (x2, y2 - w), box_color, -1)
|
21 |
cv2.rectangle(img, (x1 + w, y1 + w), (x2 - w, y2 - w), box_color, -1)
|
22 |
|
23 |
-
|
24 |
-
# draw filled ellipses
|
25 |
cv2.ellipse(img, (x1 + w, y1 + w), (w, w),
|
26 |
angle = 0, startAngle = -90, endAngle = -180, color = box_color, thickness = -1)
|
27 |
|
@@ -36,12 +48,21 @@ def draw_rounded_rect(img, rect_start, rect_end, corner_width, box_color):
|
|
36 |
|
37 |
return img
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
def draw_dotted_line(frame, lm_coord, start, end, line_color):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
pix_step = 0
|
44 |
|
|
|
45 |
for i in range(start, end+1, 8):
|
46 |
cv2.circle(frame, (lm_coord[0], i+pix_step), 2, line_color, -1, lineType=cv2.LINE_AA)
|
47 |
|
@@ -61,33 +82,62 @@ def draw_text(
|
|
61 |
overlay_image = False,
|
62 |
overlay_type = None
|
63 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
offset = box_offset
|
66 |
x, y = pos
|
|
|
|
|
67 |
text_size, _ = cv2.getTextSize(msg, font, font_scale, font_thickness)
|
68 |
text_w, text_h = text_size
|
69 |
|
|
|
70 |
rec_start = tuple(p - o for p, o in zip(pos, offset))
|
71 |
rec_end = tuple(m + n - o for m, n, o in zip((x + text_w, y + text_h), offset, (25, 0)))
|
72 |
|
73 |
resize_height = 0
|
74 |
|
|
|
|
|
75 |
if overlay_image:
|
76 |
resize_height = rec_end[1] - rec_start[1]
|
77 |
-
|
78 |
-
|
|
|
79 |
img = draw_rounded_rect(img, rec_start, (rec_end[0]+resize_height, rec_end[1]), width, text_color_bg)
|
|
|
|
|
80 |
if overlay_type == "correct":
|
81 |
overlay_res = cv2.resize(correct, (resize_height, resize_height), interpolation = cv2.INTER_AREA)
|
82 |
elif overlay_type == "incorrect":
|
83 |
overlay_res = cv2.resize(incorrect, (resize_height, resize_height), interpolation = cv2.INTER_AREA)
|
84 |
|
|
|
85 |
img[rec_start[1]:rec_start[1]+resize_height, rec_start[0]+width:rec_start[0]+width+resize_height] = overlay_res
|
86 |
|
87 |
else:
|
88 |
img = draw_rounded_rect(img, rec_start, rec_end, width, text_color_bg)
|
89 |
|
90 |
|
|
|
91 |
cv2.putText(
|
92 |
img,
|
93 |
msg,
|
@@ -99,39 +149,69 @@ def draw_text(
|
|
99 |
cv2.LINE_AA,
|
100 |
)
|
101 |
|
102 |
-
|
103 |
-
|
104 |
return text_size
|
105 |
|
106 |
-
|
107 |
-
|
108 |
def find_angle(p1, p2, ref_pt = np.array([0,0])):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
p1_ref = p1 - ref_pt
|
110 |
p2_ref = p2 - ref_pt
|
111 |
|
|
|
112 |
cos_theta = (np.dot(p1_ref,p2_ref)) / (1.0 * np.linalg.norm(p1_ref) * np.linalg.norm(p2_ref))
|
|
|
|
|
113 |
theta = np.arccos(np.clip(cos_theta, -1.0, 1.0))
|
114 |
-
|
115 |
-
degree = int(180 / np.pi) * theta
|
116 |
|
|
|
|
|
117 |
return int(degree)
|
118 |
|
|
|
|
|
|
|
119 |
|
|
|
|
|
|
|
|
|
|
|
120 |
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
|
125 |
denorm_x = int(pose_landmark[key].x * frame_width)
|
126 |
denorm_y = int(pose_landmark[key].y * frame_height)
|
127 |
|
128 |
return np.array([denorm_x, denorm_y])
|
129 |
|
|
|
|
|
|
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
|
|
|
|
|
|
|
132 |
|
133 |
-
|
134 |
-
|
135 |
if feature == 'nose':
|
136 |
return get_landmark_array(kp_results, dict_features[feature], frame_width, frame_height)
|
137 |
|
@@ -149,8 +229,20 @@ def get_landmark_features(kp_results, dict_features, feature, frame_width, frame
|
|
149 |
else:
|
150 |
raise ValueError("feature needs to be either 'nose', 'left' or 'right")
|
151 |
|
152 |
-
|
153 |
def get_mediapipe_pose(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
static_image_mode = False,
|
155 |
model_complexity = 1,
|
156 |
smooth_landmarks = True,
|
|
|
2 |
import mediapipe as mp
|
3 |
import numpy as np
|
4 |
|
5 |
+
# Load the correct and incorrect posture images as BGR colors
|
6 |
correct = cv2.imread('right.png')
|
7 |
correct = cv2.cvtColor(correct, cv2.COLOR_BGR2RGB)
|
8 |
incorrect = cv2.imread('wrong.png')
|
|
|
10 |
|
11 |
def draw_rounded_rect(img, rect_start, rect_end, corner_width, box_color):
|
12 |
|
13 |
+
"""
|
14 |
+
This function draws a rectangle with rounded corners on an image.
|
15 |
+
|
16 |
+
Args:
|
17 |
+
img: The image to draw on.
|
18 |
+
rect_start: The top-left corner of the rectangle as a tuple (x1, y1).
|
19 |
+
rect_end: The bottom-right corner of the rectangle as a tuple (x2, y2).
|
20 |
+
corner_width: The width of the rounded corners.
|
21 |
+
box_color: The color of the rectangle in BGR format.
|
22 |
+
"""
|
23 |
+
|
24 |
+
|
25 |
x1, y1 = rect_start
|
26 |
x2, y2 = rect_end
|
27 |
w = corner_width
|
28 |
|
29 |
+
# Draw filled rectangles for each side of the box
|
30 |
cv2.rectangle(img, (x1 + w, y1), (x2 - w, y1 + w), box_color, -1)
|
31 |
cv2.rectangle(img, (x1 + w, y2 - w), (x2 - w, y2), box_color, -1)
|
32 |
cv2.rectangle(img, (x1, y1 + w), (x1 + w, y2 - w), box_color, -1)
|
33 |
cv2.rectangle(img, (x2 - w, y1 + w), (x2, y2 - w), box_color, -1)
|
34 |
cv2.rectangle(img, (x1 + w, y1 + w), (x2 - w, y2 - w), box_color, -1)
|
35 |
|
36 |
+
# Draw filled ellipses for the corners
|
|
|
37 |
cv2.ellipse(img, (x1 + w, y1 + w), (w, w),
|
38 |
angle = 0, startAngle = -90, endAngle = -180, color = box_color, thickness = -1)
|
39 |
|
|
|
48 |
|
49 |
return img
|
50 |
|
|
|
|
|
|
|
51 |
def draw_dotted_line(frame, lm_coord, start, end, line_color):
|
52 |
+
"""
|
53 |
+
This function draws a dotted line on a frame based on landmark coordinates.
|
54 |
+
|
55 |
+
Args:
|
56 |
+
frame: The image to draw on.
|
57 |
+
lm_coord: The landmark coordinates as a NumPy array.
|
58 |
+
start: The index of the starting landmark in the lm_coord array.
|
59 |
+
end: The index of the ending landmark in the lm_coord array.
|
60 |
+
line_color: The color of the line in BGR format.
|
61 |
+
"""
|
62 |
+
|
63 |
pix_step = 0
|
64 |
|
65 |
+
# Draw circles at every 8th element between the start and end landmarks
|
66 |
for i in range(start, end+1, 8):
|
67 |
cv2.circle(frame, (lm_coord[0], i+pix_step), 2, line_color, -1, lineType=cv2.LINE_AA)
|
68 |
|
|
|
82 |
overlay_image = False,
|
83 |
overlay_type = None
|
84 |
):
|
85 |
+
"""
|
86 |
+
This function draws text with a customizable background box on an image.
|
87 |
+
|
88 |
+
Args:
|
89 |
+
img: The image to draw on.
|
90 |
+
msg: The message to display as a string.
|
91 |
+
width: The thickness of the background box border (default: 7).
|
92 |
+
font: The font style for the text (default: cv2.FONT_HERSHEY_SIMPLEX).
|
93 |
+
pos: The top-left corner coordinates of the text box (default: (0, 0)).
|
94 |
+
font_scale: The scaling factor for the font size (default: 1).
|
95 |
+
font_thickness: The thickness of the text (default: 2).
|
96 |
+
text_color: The color of the text in BGR format (default: green - (0, 255, 0)).
|
97 |
+
text_color_bg: The color of the background box in BGR format (default: black - (0, 0, 0)).
|
98 |
+
box_offset: The offset for the background box relative to the text (default: (20, 10)).
|
99 |
+
overlay_image: Flag to display an overlay image inside the box (default: False).
|
100 |
+
overlay_type: Type of overlay image ("correct" or "incorrect") - used when overlay_image is True.
|
101 |
+
Returns:
|
102 |
+
The size of the drawn text (width, height) as a NumPy array.
|
103 |
+
"""
|
104 |
|
105 |
offset = box_offset
|
106 |
x, y = pos
|
107 |
+
|
108 |
+
# Get the size of the text with the specified font and scale
|
109 |
text_size, _ = cv2.getTextSize(msg, font, font_scale, font_thickness)
|
110 |
text_w, text_h = text_size
|
111 |
|
112 |
+
# Calculate the top-left and bottom-right corners of the text box with padding
|
113 |
rec_start = tuple(p - o for p, o in zip(pos, offset))
|
114 |
rec_end = tuple(m + n - o for m, n, o in zip((x + text_w, y + text_h), offset, (25, 0)))
|
115 |
|
116 |
resize_height = 0
|
117 |
|
118 |
+
|
119 |
+
# Handle overlay image logic
|
120 |
if overlay_image:
|
121 |
resize_height = rec_end[1] - rec_start[1]
|
122 |
+
|
123 |
+
|
124 |
+
# Draw a rounded rectangle box with the background color
|
125 |
img = draw_rounded_rect(img, rec_start, (rec_end[0]+resize_height, rec_end[1]), width, text_color_bg)
|
126 |
+
|
127 |
+
# Resize the overlay image based on the box height
|
128 |
if overlay_type == "correct":
|
129 |
overlay_res = cv2.resize(correct, (resize_height, resize_height), interpolation = cv2.INTER_AREA)
|
130 |
elif overlay_type == "incorrect":
|
131 |
overlay_res = cv2.resize(incorrect, (resize_height, resize_height), interpolation = cv2.INTER_AREA)
|
132 |
|
133 |
+
# Overlay the resized image onto the background box
|
134 |
img[rec_start[1]:rec_start[1]+resize_height, rec_start[0]+width:rec_start[0]+width+resize_height] = overlay_res
|
135 |
|
136 |
else:
|
137 |
img = draw_rounded_rect(img, rec_start, rec_end, width, text_color_bg)
|
138 |
|
139 |
|
140 |
+
# Draw the text onto the image with specified parameters
|
141 |
cv2.putText(
|
142 |
img,
|
143 |
msg,
|
|
|
149 |
cv2.LINE_AA,
|
150 |
)
|
151 |
|
|
|
|
|
152 |
return text_size
|
153 |
|
|
|
|
|
154 |
def find_angle(p1, p2, ref_pt = np.array([0,0])):
|
155 |
+
"""
|
156 |
+
This function calculates the angle between two points relative to a reference point.
|
157 |
+
|
158 |
+
Args:
|
159 |
+
p1: The first point coordinates as a NumPy array (x, y).
|
160 |
+
p2: The second point coordinates as a NumPy array (x, y).
|
161 |
+
ref_pt: The reference point coordinates as a NumPy array (default: [0, 0]).
|
162 |
+
|
163 |
+
Returns:
|
164 |
+
The angle between the two points in degrees (int).
|
165 |
+
"""
|
166 |
+
# Subtract the reference point from both points for normalization
|
167 |
p1_ref = p1 - ref_pt
|
168 |
p2_ref = p2 - ref_pt
|
169 |
|
170 |
+
# Calculate the cosine of the angle using the dot product
|
171 |
cos_theta = (np.dot(p1_ref,p2_ref)) / (1.0 * np.linalg.norm(p1_ref) * np.linalg.norm(p2_ref))
|
172 |
+
|
173 |
+
# Clip the cosine value to avoid potential errors
|
174 |
theta = np.arccos(np.clip(cos_theta, -1.0, 1.0))
|
|
|
|
|
175 |
|
176 |
+
# Convert the angle from radians to degrees and cast to integer
|
177 |
+
degree = int(180 / np.pi) * theta
|
178 |
return int(degree)
|
179 |
|
180 |
+
def get_landmark_array(pose_landmark, key, frame_width, frame_height):
|
181 |
+
"""
|
182 |
+
This function extracts the normalized image coordinates for a landmark.
|
183 |
|
184 |
+
Args:
|
185 |
+
pose_landmark: A MediaPipe pose landmark object.
|
186 |
+
key: The key name of the landmark to extract (e.g., 'nose', 'shoulder.x').
|
187 |
+
frame_width: The width of the image frame.
|
188 |
+
frame_height: The height of the image frame.
|
189 |
|
190 |
+
Returns:
|
191 |
+
A NumPy array containing the normalized x and y coordinates of the landmark.
|
192 |
+
"""
|
193 |
|
194 |
denorm_x = int(pose_landmark[key].x * frame_width)
|
195 |
denorm_y = int(pose_landmark[key].y * frame_height)
|
196 |
|
197 |
return np.array([denorm_x, denorm_y])
|
198 |
|
199 |
+
def get_landmark_features(kp_results, dict_features, feature, frame_width, frame_height):
|
200 |
+
"""
|
201 |
+
This function extracts landmark coordinates for various body parts based on a feature name.
|
202 |
|
203 |
+
Args:
|
204 |
+
kp_results: The MediaPipe pose landmark results object.
|
205 |
+
dict_features: A dictionary containing landmark key names for different body parts.
|
206 |
+
feature: The name of the body part feature to extract (e.g., 'nose', 'left', 'right').
|
207 |
+
frame_width: The width of the image frame.
|
208 |
+
frame_height: The height of the image frame.
|
209 |
|
210 |
+
Returns:
|
211 |
+
A list containing the landmark coordinates (as NumPy arrays) or raises an error if the feature is invalid.
|
212 |
+
"""
|
213 |
|
214 |
+
|
|
|
215 |
if feature == 'nose':
|
216 |
return get_landmark_array(kp_results, dict_features[feature], frame_width, frame_height)
|
217 |
|
|
|
229 |
else:
|
230 |
raise ValueError("feature needs to be either 'nose', 'left' or 'right")
|
231 |
|
|
|
232 |
def get_mediapipe_pose(
|
233 |
+
''''
|
234 |
+
This function creates a MediaPipe Pose object for human pose estimation.
|
235 |
+
|
236 |
+
Args:
|
237 |
+
static_image_mode: Flag for processing a single static image (default: False).
|
238 |
+
model_complexity: Level of complexity for the pose model (default: 1).
|
239 |
+
smooth_landmarks: Enable smoothing of detected landmarks (default: True).
|
240 |
+
min_detection_confidence: Minimum confidence threshold for person detection (default: 0.5).
|
241 |
+
min_tracking_confidence: Minimum confidence threshold for pose tracking (default: 0.5).
|
242 |
+
|
243 |
+
Returns:
|
244 |
+
A MediaPipe Pose object.
|
245 |
+
''''
|
246 |
static_image_mode = False,
|
247 |
model_complexity = 1,
|
248 |
smooth_landmarks = True,
|