randomshit11 commited on
Commit
fe1e463
·
verified ·
1 Parent(s): 94d48ae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +246 -145
app.py CHANGED
@@ -1,189 +1,290 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import cv2
3
  import mediapipe as mp
4
- import math
5
- from PIL import Image
6
  import numpy as np
 
 
 
 
7
 
8
- ## Build and Load Model
 
 
 
 
 
9
  def attention_block(inputs, time_steps):
10
- """
11
- Attention layer for deep neural network
12
-
13
- """
14
- # Attention weights
15
  a = Permute((2, 1))(inputs)
16
  a = Dense(time_steps, activation='softmax')(a)
17
-
18
- # Attention vector
19
  a_probs = Permute((2, 1), name='attention_vec')(a)
20
-
21
- # Luong's multiplicative score
22
  output_attention_mul = multiply([inputs, a_probs], name='attention_mul')
23
-
24
  return output_attention_mul
25
 
 
26
  @st.cache(allow_output_mutation=True)
27
  def build_model(HIDDEN_UNITS=256, sequence_length=30, num_input_values=33*4, num_classes=3):
28
-
29
- # Input
30
  inputs = Input(shape=(sequence_length, num_input_values))
31
- # Bi-LSTM
32
  lstm_out = Bidirectional(LSTM(HIDDEN_UNITS, return_sequences=True))(inputs)
33
- # Attention
34
  attention_mul = attention_block(lstm_out, sequence_length)
35
  attention_mul = Flatten()(attention_mul)
36
- # Fully Connected Layer
37
  x = Dense(2*HIDDEN_UNITS, activation='relu')(attention_mul)
38
  x = Dropout(0.5)(x)
39
- # Output
40
  x = Dense(num_classes, activation='softmax')(x)
41
- # Bring it all together
42
  model = Model(inputs=[inputs], outputs=x)
43
-
44
- ## Load Model Weights
45
  load_dir = "./models/LSTM_Attention.h5"
46
  model.load_weights(load_dir)
47
-
48
  return model
49
- threshold1 = st.slider("Minimum Keypoint Detection Confidence", 0.00, 1.00, 0.50)
50
- threshold2 = st.slider("Minimum Tracking Confidence", 0.00, 1.00, 0.50)
51
- threshold3 = st.slider("Minimum Activity Classification Confidence", 0.00, 1.00, 0.50)
52
- ## Real Time Machine Learning and Computer Vision Processes
53
  class VideoProcessor:
54
  def __init__(self):
55
- # Parameters
56
  self.actions = np.array(['curl', 'press', 'squat'])
57
  self.sequence_length = 30
58
  self.colors = [(245,117,16), (117,245,16), (16,117,245)]
59
- self.threshold = 0.50 # Default threshold for activity classification confidence
60
-
61
- # Detection variables
62
- self.sequence = []
63
- self.current_action = ''
64
-
65
- # Initialize pose model
66
- self.mp_pose = mp.solutions.pose
67
- self.mp_drawing = mp.solutions.drawing_utils
68
- self.pose = self.mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)
69
 
70
- @st.cache()
71
- def draw_landmarks(self, image, results):
72
- """
73
- This function draws keypoints and landmarks detected by the human pose estimation model
74
-
75
- """
76
- self.mp_drawing.draw_landmarks(image, results.pose_landmarks, self.mp_pose.POSE_CONNECTIONS,
77
- self.mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2),
78
- self.mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
79
- )
80
- return image
81
-
82
- @st.cache()
83
- def extract_keypoints(self, results):
84
- """
85
- Processes and organizes the keypoints detected from the pose estimation model
86
- to be used as inputs for the exercise decoder models
87
-
88
- """
89
- pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
90
- return pose
91
-
92
- @st.cache()
93
- def calculate_angle(self, a, b, c):
94
- """
95
- Computes 3D joint angle inferred by 3 keypoints and their relative positions to one another
96
-
97
- """
98
- a = np.array(a) # First
99
- b = np.array(b) # Mid
100
- c = np.array(c) # End
101
-
102
- radians = np.arctan2(c[1]-b[1], c[0]-b[0]) - np.arctan2(a[1]-b[1], a[0]-b[0])
103
- angle = np.abs(radians*180.0/np.pi)
104
-
105
- if angle > 180.0:
106
- angle = 360-angle
107
-
108
- return angle
109
-
110
- @st.cache()
111
- def get_coordinates(self, landmarks, side, joint):
112
- """
113
- Retrieves x and y coordinates of a particular keypoint from the pose estimation model
114
-
115
- Args:
116
- landmarks: processed keypoints from the pose estimation model
117
- side: 'left' or 'right'. Denotes the side of the body of the landmark of interest.
118
- joint: 'shoulder', 'elbow', 'wrist', 'hip', 'knee', or 'ankle'. Denotes which body joint is associated with the landmark of interest.
119
-
120
- """
121
- coord = getattr(self.mp_pose.PoseLandmark, side.upper() + "_" + joint.upper())
122
- x_coord_val = landmarks[coord.value].x
123
- y_coord_val = landmarks[coord.value].y
124
- return [x_coord_val, y_coord_val]
125
-
126
- @st.cache()
127
- def viz_joint_angle(self, image, angle, joint):
128
- """
129
- Displays the joint angle value near the joint within the image frame
130
-
131
- """
132
- cv2.putText(image, str(int(angle)),
133
- tuple(np.multiply(joint, [640, 480]).astype(int)),
134
- cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2, cv2.LINE_AA
135
- )
136
- return
137
-
138
- @st.cache()
139
- def process_video_input(self, threshold1, threshold2, threshold3):
140
- """
141
- Processes the video input and performs real-time action recognition and rep counting.
142
-
143
- """
144
- video_file = st.file_uploader("Upload Video", type=["mp4", "avi"])
145
- if video_file is None:
146
- st.warning("Please upload a video file.")
147
- return
148
-
149
  cap = cv2.VideoCapture(video_file)
150
- if not cap.isOpened():
151
- st.error("Error opening video stream or file.")
152
- return
153
-
154
  while cap.isOpened():
155
  ret, frame = cap.read()
156
  if not ret:
157
  break
158
-
159
- # Convert frame to RGB (Mediapipe requires RGB input)
160
  frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
161
-
162
- # Pose estimation
163
  results = self.pose.process(frame_rgb)
164
-
165
- # Draw landmarks
166
- self.draw_landmarks(frame, results)
167
-
168
- # Extract keypoints
169
- keypoints = self.extract_keypoints(results)
170
-
171
- # Visualize probabilities
172
- if len(self.sequence) == self.sequence_length:
173
- sequence = np.array([self.sequence])
174
- res = model.predict(sequence)
175
- frame = self.prob_viz(res[0], frame)
176
-
177
- # Append frame to output frames
178
  out_frames.append(frame)
179
-
180
- # Release video capture
181
  cap.release()
182
- # Create an instance of VideoProcessor
183
- video_processor = VideoProcessor()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
- # Call the process_video_input method
186
- video_processor.process_video_input(threshold1, threshold2, threshold3)
187
  # import streamlit as st
188
  # import cv2
189
 
 
1
+ # import streamlit as st
2
+ # import cv2
3
+ # import mediapipe as mp
4
+ # import math
5
+ # from PIL import Image
6
+ # import numpy as np
7
+
8
+ # ## Build and Load Model
9
+ # def attention_block(inputs, time_steps):
10
+ # """
11
+ # Attention layer for deep neural network
12
+
13
+ # """
14
+ # # Attention weights
15
+ # a = Permute((2, 1))(inputs)
16
+ # a = Dense(time_steps, activation='softmax')(a)
17
+
18
+ # # Attention vector
19
+ # a_probs = Permute((2, 1), name='attention_vec')(a)
20
+
21
+ # # Luong's multiplicative score
22
+ # output_attention_mul = multiply([inputs, a_probs], name='attention_mul')
23
+
24
+ # return output_attention_mul
25
+
26
+ # @st.cache(allow_output_mutation=True)
27
+ # def build_model(HIDDEN_UNITS=256, sequence_length=30, num_input_values=33*4, num_classes=3):
28
+
29
+ # # Input
30
+ # inputs = Input(shape=(sequence_length, num_input_values))
31
+ # # Bi-LSTM
32
+ # lstm_out = Bidirectional(LSTM(HIDDEN_UNITS, return_sequences=True))(inputs)
33
+ # # Attention
34
+ # attention_mul = attention_block(lstm_out, sequence_length)
35
+ # attention_mul = Flatten()(attention_mul)
36
+ # # Fully Connected Layer
37
+ # x = Dense(2*HIDDEN_UNITS, activation='relu')(attention_mul)
38
+ # x = Dropout(0.5)(x)
39
+ # # Output
40
+ # x = Dense(num_classes, activation='softmax')(x)
41
+ # # Bring it all together
42
+ # model = Model(inputs=[inputs], outputs=x)
43
+
44
+ # ## Load Model Weights
45
+ # load_dir = "./models/LSTM_Attention.h5"
46
+ # model.load_weights(load_dir)
47
+
48
+ # return model
49
+ # threshold1 = st.slider("Minimum Keypoint Detection Confidence", 0.00, 1.00, 0.50)
50
+ # threshold2 = st.slider("Minimum Tracking Confidence", 0.00, 1.00, 0.50)
51
+ # threshold3 = st.slider("Minimum Activity Classification Confidence", 0.00, 1.00, 0.50)
52
+ # ## Real Time Machine Learning and Computer Vision Processes
53
+ # class VideoProcessor:
54
+ # def __init__(self):
55
+ # # Parameters
56
+ # self.actions = np.array(['curl', 'press', 'squat'])
57
+ # self.sequence_length = 30
58
+ # self.colors = [(245,117,16), (117,245,16), (16,117,245)]
59
+ # self.threshold = 0.50 # Default threshold for activity classification confidence
60
+
61
+ # # Detection variables
62
+ # self.sequence = []
63
+ # self.current_action = ''
64
+
65
+ # # Initialize pose model
66
+ # self.mp_pose = mp.solutions.pose
67
+ # self.mp_drawing = mp.solutions.drawing_utils
68
+ # self.pose = self.mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)
69
+
70
+ # @st.cache()
71
+ # def draw_landmarks(self, image, results):
72
+ # """
73
+ # This function draws keypoints and landmarks detected by the human pose estimation model
74
+
75
+ # """
76
+ # self.mp_drawing.draw_landmarks(image, results.pose_landmarks, self.mp_pose.POSE_CONNECTIONS,
77
+ # self.mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2),
78
+ # self.mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
79
+ # )
80
+ # return image
81
+
82
+ # @st.cache()
83
+ # def extract_keypoints(self, results):
84
+ # """
85
+ # Processes and organizes the keypoints detected from the pose estimation model
86
+ # to be used as inputs for the exercise decoder models
87
+
88
+ # """
89
+ # pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
90
+ # return pose
91
+
92
+ # @st.cache()
93
+ # def calculate_angle(self, a, b, c):
94
+ # """
95
+ # Computes 3D joint angle inferred by 3 keypoints and their relative positions to one another
96
+
97
+ # """
98
+ # a = np.array(a) # First
99
+ # b = np.array(b) # Mid
100
+ # c = np.array(c) # End
101
+
102
+ # radians = np.arctan2(c[1]-b[1], c[0]-b[0]) - np.arctan2(a[1]-b[1], a[0]-b[0])
103
+ # angle = np.abs(radians*180.0/np.pi)
104
+
105
+ # if angle > 180.0:
106
+ # angle = 360-angle
107
+
108
+ # return angle
109
+
110
+ # @st.cache()
111
+ # def get_coordinates(self, landmarks, side, joint):
112
+ # """
113
+ # Retrieves x and y coordinates of a particular keypoint from the pose estimation model
114
+
115
+ # Args:
116
+ # landmarks: processed keypoints from the pose estimation model
117
+ # side: 'left' or 'right'. Denotes the side of the body of the landmark of interest.
118
+ # joint: 'shoulder', 'elbow', 'wrist', 'hip', 'knee', or 'ankle'. Denotes which body joint is associated with the landmark of interest.
119
+
120
+ # """
121
+ # coord = getattr(self.mp_pose.PoseLandmark, side.upper() + "_" + joint.upper())
122
+ # x_coord_val = landmarks[coord.value].x
123
+ # y_coord_val = landmarks[coord.value].y
124
+ # return [x_coord_val, y_coord_val]
125
+
126
+ # @st.cache()
127
+ # def viz_joint_angle(self, image, angle, joint):
128
+ # """
129
+ # Displays the joint angle value near the joint within the image frame
130
+
131
+ # """
132
+ # cv2.putText(image, str(int(angle)),
133
+ # tuple(np.multiply(joint, [640, 480]).astype(int)),
134
+ # cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2, cv2.LINE_AA
135
+ # )
136
+ # return
137
+
138
+ # @st.cache()
139
+ # def process_video_input(self, threshold1, threshold2, threshold3):
140
+ # """
141
+ # Processes the video input and performs real-time action recognition and rep counting.
142
+
143
+ # """
144
+ # video_file = st.file_uploader("Upload Video", type=["mp4", "avi"])
145
+ # if video_file is None:
146
+ # st.warning("Please upload a video file.")
147
+ # return
148
+
149
+ # cap = cv2.VideoCapture(video_file)
150
+ # if not cap.isOpened():
151
+ # st.error("Error opening video stream or file.")
152
+ # return
153
+
154
+ # while cap.isOpened():
155
+ # ret, frame = cap.read()
156
+ # if not ret:
157
+ # break
158
+
159
+ # # Convert frame to RGB (Mediapipe requires RGB input)
160
+ # frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
161
+
162
+ # # Pose estimation
163
+ # results = self.pose.process(frame_rgb)
164
+
165
+ # # Draw landmarks
166
+ # self.draw_landmarks(frame, results)
167
+
168
+ # # Extract keypoints
169
+ # keypoints = self.extract_keypoints(results)
170
+
171
+ # # Visualize probabilities
172
+ # if len(self.sequence) == self.sequence_length:
173
+ # sequence = np.array([self.sequence])
174
+ # res = model.predict(sequence)
175
+ # frame = self.prob_viz(res[0], frame)
176
+
177
+ # # Append frame to output frames
178
+ # out_frames.append(frame)
179
+
180
+ # # Release video capture
181
+ # cap.release()
182
+ # # # Create an instance of VideoProcessor
183
+ # # video_processor = VideoProcessor()
184
+
185
+ # # # Call the process_video_input method
186
+ # # video_processor.process_video_input(threshold1, threshold2, threshold3)
187
+
188
+ # # Define Streamlit app
189
+ # def main():
190
+ # st.title("Real-time Exercise Detection")
191
+ # video_file = st.file_uploader("Upload a video file", type=["mp4", "avi"])
192
+ # if video_file is not None:
193
+ # st.video(video_file)
194
+ # video_processor = VideoProcessor()
195
+ # frames = video_processor.process_video(video_file)
196
+ # for frame in frames:
197
+ # st.image(frame, channels="BGR")
198
+
199
+ # if __name__ == "__main__":
200
+ # main()
201
+
202
+
203
+
204
  import streamlit as st
205
  import cv2
206
  import mediapipe as mp
 
 
207
  import numpy as np
208
+ import math
209
+ from tensorflow.keras.models import Model
210
+ from tensorflow.keras.layers import (LSTM, Dense, Dropout, Input, Flatten,
211
+ Bidirectional, Permute, multiply)
212
 
213
+ # Load the pose estimation model from Mediapipe
214
+ mp_pose = mp.solutions.pose
215
+ mp_drawing = mp.solutions.drawing_utils
216
+ pose = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)
217
+
218
+ # Define the attention block for the LSTM model
219
  def attention_block(inputs, time_steps):
 
 
 
 
 
220
  a = Permute((2, 1))(inputs)
221
  a = Dense(time_steps, activation='softmax')(a)
 
 
222
  a_probs = Permute((2, 1), name='attention_vec')(a)
 
 
223
  output_attention_mul = multiply([inputs, a_probs], name='attention_mul')
 
224
  return output_attention_mul
225
 
226
+ # Build and load the LSTM model
227
  @st.cache(allow_output_mutation=True)
228
  def build_model(HIDDEN_UNITS=256, sequence_length=30, num_input_values=33*4, num_classes=3):
 
 
229
  inputs = Input(shape=(sequence_length, num_input_values))
 
230
  lstm_out = Bidirectional(LSTM(HIDDEN_UNITS, return_sequences=True))(inputs)
 
231
  attention_mul = attention_block(lstm_out, sequence_length)
232
  attention_mul = Flatten()(attention_mul)
 
233
  x = Dense(2*HIDDEN_UNITS, activation='relu')(attention_mul)
234
  x = Dropout(0.5)(x)
 
235
  x = Dense(num_classes, activation='softmax')(x)
 
236
  model = Model(inputs=[inputs], outputs=x)
 
 
237
  load_dir = "./models/LSTM_Attention.h5"
238
  model.load_weights(load_dir)
 
239
  return model
240
+
241
+ # Define the VideoProcessor class for real-time video processing
 
 
242
  class VideoProcessor:
243
  def __init__(self):
 
244
  self.actions = np.array(['curl', 'press', 'squat'])
245
  self.sequence_length = 30
246
  self.colors = [(245,117,16), (117,245,16), (16,117,245)]
247
+ self.pose = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)
248
+ self.model = build_model()
 
 
 
 
 
 
 
 
249
 
250
+ def process_video(self, video_file):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  cap = cv2.VideoCapture(video_file)
252
+ out_frames = []
 
 
 
253
  while cap.isOpened():
254
  ret, frame = cap.read()
255
  if not ret:
256
  break
 
 
257
  frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
 
 
258
  results = self.pose.process(frame_rgb)
259
+ frame = self.draw_landmarks(frame, results)
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  out_frames.append(frame)
 
 
261
  cap.release()
262
+ return out_frames
263
+
264
+ def draw_landmarks(self, image, results):
265
+ mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
266
+ mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2),
267
+ mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2))
268
+ return image
269
+
270
+ # Define Streamlit app
271
+ def main():
272
+ st.title("Real-time Exercise Detection")
273
+ video_file = st.file_uploader("Upload a video file", type=["mp4", "avi"])
274
+ if video_file is not None:
275
+ st.video(video_file)
276
+ video_processor = VideoProcessor()
277
+ frames = video_processor.process_video(video_file)
278
+ for frame in frames:
279
+ st.image(frame, channels="BGR")
280
+
281
+ if __name__ == "__main__":
282
+ main()
283
+
284
+
285
+
286
+
287
 
 
 
288
  # import streamlit as st
289
  # import cv2
290