Spaces:

iDrops
/

Exercise_Tracking_Demo

Sleeping

File size: 10,447 Bytes

import cv2
import mediapipe as mp
import numpy as np

# Load the correct and incorrect posture images as BGR colors
correct = cv2.imread('right.png')
correct = cv2.cvtColor(correct, cv2.COLOR_BGR2RGB)
incorrect = cv2.imread('wrong.png')
incorrect = cv2.cvtColor(incorrect, cv2.COLOR_BGR2RGB)

def draw_rounded_rect(img, rect_start, rect_end, corner_width, box_color):

  """
  This function draws a rectangle with rounded corners on an image.

  Args:
      img: The image to draw on.
      rect_start: The top-left corner of the rectangle as a tuple (x1, y1).
      rect_end: The bottom-right corner of the rectangle as a tuple (x2, y2).
      corner_width: The width of the rounded corners.
      box_color: The color of the rectangle in BGR format.
  """

    
    x1, y1 = rect_start
    x2, y2 = rect_end
    w = corner_width

    # Draw filled rectangles for each side of the box
    cv2.rectangle(img, (x1 + w, y1), (x2 - w, y1 + w), box_color, -1)
    cv2.rectangle(img, (x1 + w, y2 - w), (x2 - w, y2), box_color, -1)
    cv2.rectangle(img, (x1, y1 + w), (x1 + w, y2 - w), box_color, -1)
    cv2.rectangle(img, (x2 - w, y1 + w), (x2, y2 - w), box_color, -1)
    cv2.rectangle(img, (x1 + w, y1 + w), (x2 - w, y2 - w), box_color, -1)

    # Draw filled ellipses for the corners
    cv2.ellipse(img, (x1 + w, y1 + w), (w, w),
                angle = 0, startAngle = -90, endAngle = -180, color = box_color, thickness = -1)

    cv2.ellipse(img, (x2 - w, y1 + w), (w, w),
                angle = 0, startAngle = 0, endAngle = -90, color = box_color, thickness = -1)

    cv2.ellipse(img, (x1 + w, y2 - w), (w, w),
                angle = 0, startAngle = 90, endAngle = 180, color = box_color, thickness = -1)

    cv2.ellipse(img, (x2 - w, y2 - w), (w, w),
                angle = 0, startAngle = 0, endAngle = 90, color = box_color, thickness = -1)

    return img

def draw_dotted_line(frame, lm_coord, start, end, line_color):
  """
  This function draws a dotted line on a frame based on landmark coordinates.

  Args:
      frame: The image to draw on.
      lm_coord: The landmark coordinates as a NumPy array.
      start: The index of the starting landmark in the lm_coord array.
      end: The index of the ending landmark in the lm_coord array.
      line_color: The color of the line in BGR format.
  """
    
    pix_step = 0

    # Draw circles at every 8th element between the start and end landmarks
    for i in range(start, end+1, 8):
        cv2.circle(frame, (lm_coord[0], i+pix_step), 2, line_color, -1, lineType=cv2.LINE_AA)

    return frame

def draw_text(
    img,
    msg,
    width = 7,
    font=cv2.FONT_HERSHEY_SIMPLEX,
    pos=(0, 0),
    font_scale=1,
    font_thickness=2,
    text_color=(0, 255, 0),
    text_color_bg=(0, 0, 0),
    box_offset=(20, 10),
    overlay_image = False,
    overlay_type = None
):
      """
  This function draws text with a customizable background box on an image.

  Args:
      img: The image to draw on.
      msg: The message to display as a string.
      width: The thickness of the background box border (default: 7).
      font: The font style for the text (default: cv2.FONT_HERSHEY_SIMPLEX).
      pos: The top-left corner coordinates of the text box (default: (0, 0)).
      font_scale: The scaling factor for the font size (default: 1).
      font_thickness: The thickness of the text (default: 2).
      text_color: The color of the text in BGR format (default: green - (0, 255, 0)).
      text_color_bg: The color of the background box in BGR format (default: black - (0, 0, 0)).
      box_offset: The offset for the background box relative to the text (default: (20, 10)).
      overlay_image: Flag to display an overlay image inside the box (default: False).
      overlay_type: Type of overlay image ("correct" or "incorrect") - used when overlay_image is True.
  Returns:
      The size of the drawn text (width, height) as a NumPy array.
  """

    offset = box_offset
    x, y = pos

    # Get the size of the text with the specified font and scale
    text_size, _ = cv2.getTextSize(msg, font, font_scale, font_thickness)
    text_w, text_h = text_size

    # Calculate the top-left and bottom-right corners of the text box with padding
    rec_start = tuple(p - o for p, o in zip(pos, offset))
    rec_end = tuple(m + n - o for m, n, o in zip((x + text_w, y + text_h), offset, (25, 0)))

    resize_height = 0


    # Handle overlay image logic
    if overlay_image:
        resize_height = rec_end[1] - rec_start[1]


        # Draw a rounded rectangle box with the background color
        img = draw_rounded_rect(img, rec_start, (rec_end[0]+resize_height, rec_end[1]), width, text_color_bg)
        
        # Resize the overlay image based on the box height
        if overlay_type == "correct":
            overlay_res = cv2.resize(correct, (resize_height, resize_height), interpolation = cv2.INTER_AREA)		
        elif overlay_type == "incorrect":
            overlay_res = cv2.resize(incorrect, (resize_height, resize_height), interpolation = cv2.INTER_AREA)

        # Overlay the resized image onto the background box
        img[rec_start[1]:rec_start[1]+resize_height, rec_start[0]+width:rec_start[0]+width+resize_height] = overlay_res

    else:
        img = draw_rounded_rect(img, rec_start, rec_end, width, text_color_bg)


    # Draw the text onto the image with specified parameters
    cv2.putText(
        img,
        msg,
        (int(rec_start[0]+resize_height + 8), int(y + text_h + font_scale - 1)), 
        font,
        font_scale,
        text_color,
        font_thickness,
        cv2.LINE_AA,
    )

    return text_size

def find_angle(p1, p2, ref_pt = np.array([0,0])):
  """
  This function calculates the angle between two points relative to a reference point.

  Args:
      p1: The first point coordinates as a NumPy array (x, y).
      p2: The second point coordinates as a NumPy array (x, y).
      ref_pt: The reference point coordinates as a NumPy array (default: [0, 0]).

  Returns:
      The angle between the two points in degrees (int).
  """
    # Subtract the reference point from both points for normalization
    p1_ref = p1 - ref_pt
    p2_ref = p2 - ref_pt

    # Calculate the cosine of the angle using the dot product
    cos_theta = (np.dot(p1_ref,p2_ref)) / (1.0 * np.linalg.norm(p1_ref) * np.linalg.norm(p2_ref))
    
    # Clip the cosine value to avoid potential errors
    theta = np.arccos(np.clip(cos_theta, -1.0, 1.0))

    # Convert the angle from radians to degrees and cast to integer
    degree = int(180 / np.pi) * theta
    return int(degree)

def get_landmark_array(pose_landmark, key, frame_width, frame_height):
  """
  This function extracts the normalized image coordinates for a landmark.

  Args:
      pose_landmark: A MediaPipe pose landmark object.
      key: The key name of the landmark to extract (e.g., 'nose', 'shoulder.x').
      frame_width: The width of the image frame.
      frame_height: The height of the image frame.

  Returns:
      A NumPy array containing the normalized x and y coordinates of the landmark.
  """

    denorm_x = int(pose_landmark[key].x * frame_width)
    denorm_y = int(pose_landmark[key].y * frame_height)

    return np.array([denorm_x, denorm_y])

def get_landmark_features(kp_results, dict_features, feature, frame_width, frame_height):
  """
  This function extracts landmark coordinates for various body parts based on a feature name.

  Args:
      kp_results: The MediaPipe pose landmark results object.
      dict_features: A dictionary containing landmark key names for different body parts.
      feature: The name of the body part feature to extract (e.g., 'nose', 'left', 'right').
      frame_width: The width of the image frame.
      frame_height: The height of the image frame.

  Returns:
      A list containing the landmark coordinates (as NumPy arrays) or raises an error if the feature is invalid.
  """

    
    if feature == 'nose':
        return get_landmark_array(kp_results, dict_features[feature], frame_width, frame_height)

    elif feature == 'left' or 'right':
        shldr_coord = get_landmark_array(kp_results, dict_features[feature]['shoulder'], frame_width, frame_height)
        elbow_coord   = get_landmark_array(kp_results, dict_features[feature]['elbow'], frame_width, frame_height)
        wrist_coord   = get_landmark_array(kp_results, dict_features[feature]['wrist'], frame_width, frame_height)
        hip_coord   = get_landmark_array(kp_results, dict_features[feature]['hip'], frame_width, frame_height)
        knee_coord   = get_landmark_array(kp_results, dict_features[feature]['knee'], frame_width, frame_height)
        ankle_coord   = get_landmark_array(kp_results, dict_features[feature]['ankle'], frame_width, frame_height)
        foot_coord   = get_landmark_array(kp_results, dict_features[feature]['foot'], frame_width, frame_height)

        return shldr_coord, elbow_coord, wrist_coord, hip_coord, knee_coord, ankle_coord, foot_coord
    
    else:
       raise ValueError("feature needs to be either 'nose', 'left' or 'right")

def get_mediapipe_pose(
"""
  This function creates a MediaPipe Pose object for human pose estimation.

  Args:
      static_image_mode: Flag for processing a single static image (default: False).
      model_complexity: Level of complexity for the pose model (default: 1).
      smooth_landmarks: Enable smoothing of detected landmarks (default: True).
      min_detection_confidence: Minimum confidence threshold for person detection (default: 0.5).
      min_tracking_confidence: Minimum confidence threshold for pose tracking (default: 0.5).

  Returns:
      A MediaPipe Pose object.
"""
                        static_image_mode = False, 
                        model_complexity = 1,
                        smooth_landmarks = True,
                        min_detection_confidence = 0.5,
                        min_tracking_confidence = 0.5

                      ):
    pose = mp.solutions.pose.Pose(
                                    static_image_mode = static_image_mode,
                                    model_complexity = model_complexity,
                                    smooth_landmarks = smooth_landmarks,
                                    min_detection_confidence = min_detection_confidence,
                                    min_tracking_confidence = min_tracking_confidence
                                 )
    return pose