max-unfinity commited on
Commit
c66f90e
1 Parent(s): 7589132
Files changed (5) hide show
  1. .gitignore +3 -0
  2. Dockerfile +10 -1
  3. app.py +101 -0
  4. infer.py +74 -0
  5. yolov8-test.ipynb +146 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ *.pt
2
+ /images
3
+ __pycache__
Dockerfile CHANGED
@@ -1,3 +1,12 @@
1
  FROM nvcr.io/nvidia/pytorch:23.12-py3
2
 
3
- RUN pip install ultralytics
 
 
 
 
 
 
 
 
 
 
1
  FROM nvcr.io/nvidia/pytorch:23.12-py3
2
 
3
+ RUN pip install ultralytics
4
+ RUN pip install streamlit
5
+ RUN pip install opencv-python==4.6.0.66
6
+ RUN pip install Pillow==10.3.0
7
+
8
+ RUN apt update && apt install fonts-dejavu
9
+
10
+ EXPOSE 8501
11
+ CMD streamlit run app.py \
12
+ --server.headless true
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ from PIL import Image, ImageDraw, ImageFont
4
+ from ultralytics import YOLO
5
+ import torch
6
+ import infer
7
+
8
+
9
+ @st.cache_resource()
10
+ def load_model():
11
+ print('Loading model...')
12
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
13
+ model_pose = YOLO('yolov8l-pose.pt')
14
+ model_pose.to(device)
15
+ return model_pose
16
+
17
+
18
+ def draw_output(image_pil: Image.Image, keypoints: dict):
19
+ draw = ImageDraw.Draw(image_pil)
20
+ line_width = 10
21
+ font = ImageFont.truetype("DejaVuSerif-Bold.ttf", 70)
22
+
23
+ ear, eye = None, None
24
+ if keypoints["left_ear"] and keypoints["left_eye"]:
25
+ ear = keypoints["left_ear"]
26
+ eye = keypoints["left_eye"]
27
+ elif keypoints["right_ear"] and keypoints["right_eye"]:
28
+ ear = keypoints["right_ear"]
29
+ eye = keypoints["right_eye"]
30
+
31
+ # draw extended left and right eye lines
32
+ if ear and eye:
33
+ left_new_point = infer.extend_line(ear, eye, 3)
34
+ l1 = [ear, left_new_point]
35
+ draw.line(l1, fill='red', width=line_width)
36
+ # draw a horizontal line from ear forwards
37
+ ear = np.array(ear)
38
+ l1 = np.array(l1)
39
+ l1_vector = l1[1] - l1[0]
40
+ x_s = np.sign(l1_vector)[0]
41
+ length_l1 = np.linalg.norm(l1_vector)
42
+ p2 = ear + np.array([length_l1*x_s, 0])
43
+ ear = tuple(ear.tolist())
44
+ l = [ear, tuple(p2.tolist())]
45
+ draw.line(l, fill='gray', width=line_width//2)
46
+ # draw angle
47
+ angle = infer.calculate_angle_to_horizontal(l1_vector)
48
+ draw.text(ear, f'{angle:.2f}', fill='red', font=font)
49
+
50
+ # draw elbow angles
51
+ left_elbow_angle, right_elbow_angle = infer.get_elbow_angles(keypoints)
52
+ if left_elbow_angle:
53
+ draw.text(keypoints['left_elbow'], f'{left_elbow_angle:.2f}', fill='red', font=font)
54
+ # draw polyline for left arm
55
+ draw.line([keypoints['left_shoulder'], keypoints['left_elbow'], keypoints['left_wrist']], fill='blue', width=line_width)
56
+ if right_elbow_angle:
57
+ draw.text(keypoints['right_elbow'], f'{right_elbow_angle:.2f}', fill='red', font=font)
58
+ # draw polyline for right arm
59
+ draw.line([keypoints['right_shoulder'], keypoints['right_elbow'], keypoints['right_wrist']], fill='blue', width=line_width)
60
+
61
+ return image_pil
62
+
63
+
64
+ st.title('Pose Estimation App')
65
+
66
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
67
+ st.caption(f'Using device: {device}')
68
+
69
+ upload_tab, camera_tab = st.tabs(["Upload Photo", "Webcam Capture"])
70
+
71
+ with upload_tab:
72
+ uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
73
+
74
+ with camera_tab:
75
+ img_file_buffer = st.camera_input("Take a picture")
76
+
77
+ img = None
78
+ if img_file_buffer is not None:
79
+ img = Image.open(img_file_buffer)
80
+ if uploaded_file is not None:
81
+ img = Image.open(uploaded_file)
82
+
83
+ if img is not None:
84
+ # predict
85
+ with st.spinner('Predicting...'):
86
+ model = load_model()
87
+ pred = model(img)[0]
88
+ keypoints = infer.get_keypoints(pred)
89
+ if keypoints is not None:
90
+ img = draw_output(img, keypoints)
91
+ st.image(img, caption='Predicted image', use_column_width=True)
92
+ lea, rea = infer.get_eye_angles(keypoints)
93
+ lba, rba = infer.get_elbow_angles(keypoints)
94
+ st.write('Angles:')
95
+ st.json({'left_eye_angle': lea, 'right_eye_angle': rea, 'left_elbow_angle': lba, 'right_elbow_angle': rba})
96
+ st.write('Raw keypoints:')
97
+ st.json(keypoints)
98
+ else:
99
+ st.error('No keypoints detected!')
100
+ st.image(img, caption='Original image', use_column_width=True)
101
+
infer.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from ultralytics.engine.results import Results
3
+
4
+
5
+ KEYPOINT_NAMES = ["nose","left_eye","right_eye","left_ear","right_ear","left_shoulder",
6
+ "right_shoulder","left_elbow","right_elbow","left_wrist","right_wrist",
7
+ "left_hip","right_hip","left_knee","right_knee","left_ankle","right_ankle"]
8
+
9
+
10
+ def get_keypoints(result: Results):
11
+ keypoints = None
12
+ for i, box in enumerate(result.boxes):
13
+ if box.cls != 0.: # Only consider the person class
14
+ continue
15
+ person_conf = box.conf.item()
16
+ k = result.keypoints.data[i]
17
+ x = k[:, 0].tolist()
18
+ y = k[:, 1].tolist()
19
+ score = k[:, 2]
20
+ visible = (score > 0.5).tolist()
21
+ # keypoints = {'x': x, 'y': y, 'visible': visible}
22
+ keypoints = {key_name: (x_, y_) if v_ else None for key_name,x_,y_,v_ in zip(KEYPOINT_NAMES, x, y, visible)}
23
+ break
24
+ return keypoints
25
+
26
+
27
+ def calculate_angle(p1, p2, p3):
28
+ v1 = np.array([p1[0] - p2[0], p1[1] - p2[1]])
29
+ v2 = np.array([p3[0] - p2[0], p3[1] - p2[1]])
30
+ angle_rad = np.arccos(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)))
31
+ angle_deg = np.degrees(angle_rad)
32
+ return angle_deg
33
+
34
+
35
+ def calculate_angle_to_horizontal(vector):
36
+ angle_rad = np.arctan2(vector[1], vector[0])
37
+ angle_deg = np.degrees(angle_rad)
38
+ # Adjust the angle to be within -90 to +90 degrees
39
+ if angle_deg > 90:
40
+ angle_deg = 180 - angle_deg
41
+ elif angle_deg < -90:
42
+ angle_deg = -180 - angle_deg
43
+ return -angle_deg
44
+
45
+
46
+ def extend_line(start, end, extend_factor=3):
47
+ vector = np.array(end) - np.array(start)
48
+ length = np.linalg.norm(vector)
49
+ unit_vector = vector / np.linalg.norm(vector)
50
+ new_point = end + unit_vector * length * extend_factor
51
+ new_point = new_point.tolist()
52
+ return (new_point[0], new_point[1])
53
+
54
+
55
+ def get_elbow_angles(keypoints: dict):
56
+ left_elbow_angle = None
57
+ right_elbow_angle = None
58
+ if keypoints['left_shoulder'] and keypoints['left_elbow'] and keypoints['left_wrist']:
59
+ left_elbow_angle = calculate_angle(keypoints['left_shoulder'], keypoints['left_elbow'], keypoints['left_wrist'])
60
+ if keypoints['right_shoulder'] and keypoints['right_elbow'] and keypoints['right_wrist']:
61
+ right_elbow_angle = calculate_angle(keypoints['right_shoulder'], keypoints['right_elbow'], keypoints['right_wrist'])
62
+ return left_elbow_angle, right_elbow_angle
63
+
64
+
65
+ def get_eye_angles(keypoints: dict):
66
+ left_eye_angle = None
67
+ right_eye_angle = None
68
+ if keypoints['left_ear'] and keypoints['left_eye']:
69
+ left_vector = (keypoints['left_eye'][0] - keypoints['left_ear'][0], keypoints['left_eye'][1] - keypoints['left_ear'][1])
70
+ left_eye_angle = calculate_angle_to_horizontal(left_vector)
71
+ if keypoints['right_ear'] and keypoints['right_eye']:
72
+ right_vector = (keypoints['right_eye'][0] - keypoints['right_ear'][0], keypoints['right_eye'][1] - keypoints['right_ear'][1])
73
+ right_eye_angle = calculate_angle_to_horizontal(right_vector)
74
+ return left_eye_angle, right_eye_angle
yolov8-test.ipynb ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "from ultralytics import YOLO\n",
10
+ "import torch\n",
11
+ "from PIL import Image, ImageDraw, ImageFont\n",
12
+ "import numpy as np\n",
13
+ "import infer"
14
+ ]
15
+ },
16
+ {
17
+ "cell_type": "code",
18
+ "execution_count": null,
19
+ "metadata": {},
20
+ "outputs": [],
21
+ "source": [
22
+ "from importlib import reload\n",
23
+ "reload(infer)"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type": "code",
28
+ "execution_count": null,
29
+ "metadata": {},
30
+ "outputs": [],
31
+ "source": [
32
+ "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
33
+ "model_pose = YOLO('yolov8l-pose.pt')\n",
34
+ "model_pose.to(device)\n",
35
+ "\n",
36
+ "model_det = YOLO('yolov8m.pt')\n",
37
+ "model_det.to(device);"
38
+ ]
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "execution_count": null,
43
+ "metadata": {},
44
+ "outputs": [],
45
+ "source": [
46
+ "url = \"image.jpg\"\n",
47
+ "results = model_pose(url)\n",
48
+ "results_det = model_det(url)"
49
+ ]
50
+ },
51
+ {
52
+ "cell_type": "code",
53
+ "execution_count": null,
54
+ "metadata": {},
55
+ "outputs": [],
56
+ "source": [
57
+ "def draw_output(image_pil: Image.Image, keypoints: dict): \n",
58
+ " draw = ImageDraw.Draw(image_pil)\n",
59
+ " line_width = 10\n",
60
+ " font = ImageFont.truetype(\"DejaVuSerif-Bold.ttf\", 70)\n",
61
+ " \n",
62
+ " ear, eye = None, None\n",
63
+ " if keypoints[\"left_ear\"] and keypoints[\"left_eye\"]:\n",
64
+ " ear = keypoints[\"left_ear\"]\n",
65
+ " eye = keypoints[\"left_eye\"]\n",
66
+ " elif keypoints[\"right_ear\"] and keypoints[\"right_eye\"]:\n",
67
+ " ear = keypoints[\"right_ear\"]\n",
68
+ " eye = keypoints[\"right_eye\"]\n",
69
+ " \n",
70
+ " # draw extended left and right eye lines\n",
71
+ " if ear and eye:\n",
72
+ " left_new_point = infer.extend_line(ear, eye, 3)\n",
73
+ " l1 = [ear, left_new_point]\n",
74
+ " draw.line(l1, fill='red', width=line_width)\n",
75
+ " # draw a horizontal line from ear forwards\n",
76
+ " ear = np.array(ear)\n",
77
+ " l1 = np.array(l1)\n",
78
+ " l1_vector = l1[1] - l1[0]\n",
79
+ " x_s = np.sign(l1_vector)[0]\n",
80
+ " length_l1 = np.linalg.norm(l1_vector)\n",
81
+ " p2 = ear + np.array([length_l1*x_s, 0])\n",
82
+ " ear = tuple(ear.tolist())\n",
83
+ " l = [ear, tuple(p2.tolist())]\n",
84
+ " draw.line(l, fill='gray', width=line_width//2)\n",
85
+ " # draw angle\n",
86
+ " angle = infer.calculate_angle_to_horizontal(l1_vector)\n",
87
+ " draw.text(ear, f'{angle:.2f}', fill='red', font=font)\n",
88
+ " print(infer.get_eye_angles(keypoints))\n",
89
+ "\n",
90
+ "\n",
91
+ " # draw elbow angles\n",
92
+ " left_elbow_angle, right_elbow_angle = infer.get_elbow_angles(keypoints)\n",
93
+ " if left_elbow_angle:\n",
94
+ " draw.text(keypoints['left_elbow'], f'{left_elbow_angle:.2f}', fill='red', font=font)\n",
95
+ " # draw polyline for left arm\n",
96
+ " draw.line([keypoints['left_shoulder'], keypoints['left_elbow'], keypoints['left_wrist']], fill='blue', width=line_width)\n",
97
+ " if right_elbow_angle:\n",
98
+ " draw.text(keypoints['right_elbow'], f'{right_elbow_angle:.2f}', fill='red', font=font)\n",
99
+ " # draw polyline for right arm\n",
100
+ " draw.line([keypoints['right_shoulder'], keypoints['right_elbow'], keypoints['right_wrist']], fill='blue', width=line_width)\n",
101
+ "\n",
102
+ " return image_pil"
103
+ ]
104
+ },
105
+ {
106
+ "cell_type": "code",
107
+ "execution_count": null,
108
+ "metadata": {},
109
+ "outputs": [],
110
+ "source": [
111
+ "keypoints = infer.get_keypoints(results[0])\n",
112
+ "img = Image.open(url)\n",
113
+ "img = draw_output(img, keypoints)\n",
114
+ "img.resize((800, 800))"
115
+ ]
116
+ },
117
+ {
118
+ "cell_type": "code",
119
+ "execution_count": null,
120
+ "metadata": {},
121
+ "outputs": [],
122
+ "source": []
123
+ }
124
+ ],
125
+ "metadata": {
126
+ "kernelspec": {
127
+ "display_name": "Python 3",
128
+ "language": "python",
129
+ "name": "python3"
130
+ },
131
+ "language_info": {
132
+ "codemirror_mode": {
133
+ "name": "ipython",
134
+ "version": 3
135
+ },
136
+ "file_extension": ".py",
137
+ "mimetype": "text/x-python",
138
+ "name": "python",
139
+ "nbconvert_exporter": "python",
140
+ "pygments_lexer": "ipython3",
141
+ "version": "3.10.12"
142
+ }
143
+ },
144
+ "nbformat": 4,
145
+ "nbformat_minor": 2
146
+ }