Spaces:

randomshit11
/

frrf

Sleeping

App Files Files Community

randomshit11 commited on Feb 28, 2024

Commit

1d15de2

verified ·

1 Parent(s): 54b36c1

Update app.py

Browse files

Files changed (1) hide show

app.py +128 -68

app.py CHANGED Viewed

@@ -73,100 +73,74 @@
 #                                   mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2))
 #         return image
-import os
-import streamlit as st
-import cv2
-import mediapipe as mp
-import numpy as np
-import math
-from tensorflow.keras.models import Model
-from tensorflow.keras.layers import (LSTM, Dense, Dropout, Input, Flatten,
-                                     Bidirectional, Permute, multiply)
-# Load the pose estimation model from Mediapipe
-mp_pose = mp.solutions.pose
-mp_drawing = mp.solutions.drawing_utils
-pose = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)
-# Define the attention block for the LSTM model
-def attention_block(inputs, time_steps):
-    a = Permute((2, 1))(inputs)
-    a = Dense(time_steps, activation='softmax')(a)
-    a_probs = Permute((2, 1), name='attention_vec')(a)
-    output_attention_mul = multiply([inputs, a_probs], name='attention_mul')
-    return output_attention_mul
-# Build and load the LSTM model
-@st.cache(allow_output_mutation=True)
-def build_model(HIDDEN_UNITS=256, sequence_length=30, num_input_values=33*4, num_classes=3):
-    inputs = Input(shape=(sequence_length, num_input_values))
-    lstm_out = Bidirectional(LSTM(HIDDEN_UNITS, return_sequences=True))(inputs)
-    attention_mul = attention_block(lstm_out, sequence_length)
-    attention_mul = Flatten()(attention_mul)
-    x = Dense(2*HIDDEN_UNITS, activation='relu')(attention_mul)
-    x = Dropout(0.5)(x)
-    x = Dense(num_classes, activation='softmax')(x)
-    model = Model(inputs=[inputs], outputs=x)
-    load_dir = "./models/LSTM_Attention.h5"
-    model.load_weights(load_dir)
-    return model
-# Define the VideoProcessor class for real-time video processing
 class VideoProcessor:
     def __init__(self):
         self.actions = np.array(['curl', 'press', 'squat'])
         self.sequence_length = 30
         self.colors = [(245,117,16), (117,245,16), (16,117,245)]
-        self.pose = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)
-        self.model = build_model()
-    def process_video(self, video_file):
-        # Get the filename from the file object
-        filename = video_file.name
-        # Create a temporary file to write the contents of the uploaded video file
-        temp_file = open(filename, 'wb')
-        temp_file.write(video_file.read())
-        temp_file.close()
-        # Now we can open the video file using cv2.VideoCapture()
-        cap = cv2.VideoCapture(filename)
-        out_frames = []
-        while cap.isOpened():
-            ret, frame = cap.read()
-            if not ret:
-                break
-            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            results = self.pose.process(frame_rgb)
-            frame = self.draw_landmarks(frame, results)
-            out_frames.append(frame)
-        cap.release()
-        # Remove the temporary file
-        os.remove(filename)
-        return out_frames
     def draw_landmarks(self, image, results):
-        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
-                                  mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2),
-                                  mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2))
         return image
     @st.cache()
     def extract_keypoints(self, results):
         pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
         return pose
     @st.cache()
     def calculate_angle(self, a, b, c):
         a = np.array(a) # First
         b = np.array(b) # Mid
         c = np.array(c) # End
         radians = np.arctan2(c[1]-b[1], c[0]-b[0]) - np.arctan2(a[1]-b[1], a[0]-b[0])
         angle = np.abs(radians*180.0/np.pi)
         if angle > 180.0:
             angle = 360-angle
         return angle
     @st.cache()
     def get_coordinates(self, landmarks, side, joint):
         coord = getattr(self.mp_pose.PoseLandmark, side.upper() + "_" + joint.upper())
         x_coord_val = landmarks[coord.value].x
         y_coord_val = landmarks[coord.value].y
@@ -174,12 +148,98 @@ class VideoProcessor:
     @st.cache()
     def viz_joint_angle(self, image, angle, joint):
         cv2.putText(image, str(int(angle)),
                     tuple(np.multiply(joint, [640, 480]).astype(int)),
                     cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2, cv2.LINE_AA
                             )
         return
 # Define Streamlit app
 def main():
     st.title("Real-time Exercise Detection")

 #                                   mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2))
 #         return image
 class VideoProcessor:
     def __init__(self):
         self.actions = np.array(['curl', 'press', 'squat'])
         self.sequence_length = 30
         self.colors = [(245,117,16), (117,245,16), (16,117,245)]
+        self.threshold = 0.50  # Default threshold for activity classification confidence
+        # Detection variables
+        self.sequence = []
+        self.current_action = ''
+        # Initialize pose model
+        self.mp_pose = mp.solutions.pose
+        self.mp_drawing = mp.solutions.drawing_utils
+        self.pose = self.mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)
+        self.model = build_model()  # Load the LSTM model
+    @st.cache()
     def draw_landmarks(self, image, results):
+        """
+        This function draws keypoints and landmarks detected by the human pose estimation model
+        """
+        self.mp_drawing.draw_landmarks(image, results.pose_landmarks, self.mp_pose.POSE_CONNECTIONS,
+                                        self.mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2),
+                                        self.mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
+                                        )
         return image
     @st.cache()
     def extract_keypoints(self, results):
+        """
+        Processes and organizes the keypoints detected from the pose estimation model
+        to be used as inputs for the exercise decoder models
+        """
         pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
         return pose
     @st.cache()
     def calculate_angle(self, a, b, c):
+        """
+        Computes 3D joint angle inferred by 3 keypoints and their relative positions to one another
+        """
         a = np.array(a) # First
         b = np.array(b) # Mid
         c = np.array(c) # End
         radians = np.arctan2(c[1]-b[1], c[0]-b[0]) - np.arctan2(a[1]-b[1], a[0]-b[0])
         angle = np.abs(radians*180.0/np.pi)
         if angle > 180.0:
             angle = 360-angle
         return angle
     @st.cache()
     def get_coordinates(self, landmarks, side, joint):
+        """
+        Retrieves x and y coordinates of a particular keypoint from the pose estimation model
+        Args:
+            landmarks: processed keypoints from the pose estimation model
+            side: 'left' or 'right'. Denotes the side of the body of the landmark of interest.
+            joint: 'shoulder', 'elbow', 'wrist', 'hip', 'knee', or 'ankle'. Denotes which body joint is associated with the landmark of interest.
+        """
         coord = getattr(self.mp_pose.PoseLandmark, side.upper() + "_" + joint.upper())
         x_coord_val = landmarks[coord.value].x
         y_coord_val = landmarks[coord.value].y
     @st.cache()
     def viz_joint_angle(self, image, angle, joint):
+        """
+        Displays the joint angle value near the joint within the image frame
+        """
         cv2.putText(image, str(int(angle)),
                     tuple(np.multiply(joint, [640, 480]).astype(int)),
                     cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2, cv2.LINE_AA
                             )
         return
+    @st.cache()
+    def process(self, image):
+        """
+        Function to process the video frame from the user's webcam and run the fitness trainer AI
+        Args:
+            image (numpy array): input image from the webcam
+        Returns:
+            numpy array: processed image with keypoint detection and fitness activity classification visualized
+        """
+        # Pose detection model
+        image.flags.writeable = False
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        results = pose.process(image)
+        # Draw the hand annotations on the image.
+        image.flags.writeable = True
+        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+        self.draw_landmarks(image, results)
+        # Prediction logic
+        keypoints = self.extract_keypoints(results)
+        self.sequence.append(keypoints.astype('float32',casting='same_kind'))
+        self.sequence = self.sequence[-self.sequence_length:]
+        if len(self.sequence) == self.sequence_length:
+            res = model.predict(np.expand_dims(self.sequence, axis=0), verbose=0)[0]
+            # interpreter.set_tensor(self.input_details[0]['index'], np.expand_dims(self.sequence, axis=0))
+            # interpreter.invoke()
+            # res = interpreter.get_tensor(self.output_details[0]['index'])
+            self.current_action = self.actions[np.argmax(res)]
+            confidence = np.max(res)
+            # Erase current action variable if no probability is above threshold
+            if confidence < self.threshold:
+                self.current_action = ''
+            # Viz probabilities
+            image = self.prob_viz(res, image)
+            # Count reps
+            try:
+                landmarks = results.pose_landmarks.landmark
+                self.count_reps(
+                    image, landmarks, mp_pose)
+            except:
+                pass
+            # Display graphical information
+            cv2.rectangle(image, (0,0), (640, 40), self.colors[np.argmax(res)], -1)
+            cv2.putText(image, 'curl ' + str(self.curl_counter), (3,30),
+                           cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
+            cv2.putText(image, 'press ' + str(self.press_counter), (240,30),
+                           cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
+            cv2.putText(image, 'squat ' + str(self.squat_counter), (490,30),
+                           cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
+        # return cv2.flip(image, 1)
+        return image
+    def process_video(self, video_file):
+        # Get the filename from the file object
+        filename = video_file.name
+        # Create a temporary file to write the contents of the uploaded video file
+        temp_file = open(filename, 'wb')
+        temp_file.write(video_file.read())
+        temp_file.close()
+        # Now we can open the video file using cv2.VideoCapture()
+        cap = cv2.VideoCapture(filename)
+        out_frames = []
+        while cap.isOpened():
+            ret, frame = cap.read()
+            if not ret:
+                break
+            frame_processed = self.process(frame)
+            out_frames.append(frame_processed)
+        cap.release()
+        # Remove the temporary file
+        os.remove(filename)
+        return out_frames
 # Define Streamlit app
 def main():
     st.title("Real-time Exercise Detection")