In [242]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp


In [243]:
# Pre-trained pose estimation model from Google Mediapipe
mp_pose = mp.solutions.pose

# Supported Mediapipe visualization tools
mp_drawing = mp.solutions.drawing_utils

In [244]:
def mediapipe_detection(image, model):
 """
 This function detects human pose estimation keypoints from webcam footage
 
 """
 image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
 image.flags.writeable = False # Image is no longer writeable
 results = model.process(image) # Make prediction
 image.flags.writeable = True # Image is now writeable 
 image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
 return image, results

In [245]:
def draw_landmarks(image, results):
 """
 This function draws keypoints and landmarks detected by the human pose estimation model
 
 """
 mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
 mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2), 
 mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2) 
 )

In [246]:
def draw_detection(image, results):

 h, w, c = image.shape
 cx_min = w
 cy_min = h
 cx_max = cy_max = 0
 center = [w//2, h//2]
 try:
 for id, lm in enumerate(results.pose_landmarks.landmark):
 cx, cy = int(lm.x * w), int(lm.y * h)
 if cx < cx_min:
 cx_min = cx
 if cy < cy_min:
 cy_min = cy
 if cx > cx_max:
 cx_max = cx
 if cy > cy_max:
 cy_max = cy
 
 boxW, boxH = cx_max - cx_min, cy_max - cy_min
 
 # center
 cx, cy = cx_min + (boxW // 2), \
 cy_min + (boxH // 2) 
 center = [cx, cy]
 
 cv2.rectangle(
 image, (cx_min, cy_min), (cx_max, cy_max), (255, 255, 0), 2
 )
 except:
 pass
 
 return [[cx_min, cy_min], [cx_max, cy_max]], center

In [247]:
def normalize(image, results, bounding_box, landmark_names):
 h, w, c = image.shape
 if results.pose_landmarks:
 xy = {}
 xy_norm = {}
 i = 0
 for res in results.pose_landmarks.landmark:
 x = res.x * w
 y = res.y * h
 
 x_norm = (x - bounding_box[0][0]) / (bounding_box[1][0] - bounding_box[0][0])
 y_norm = (y - bounding_box[0][1]) / (bounding_box[1][1] - bounding_box[0][1])
 
 # xy_norm.append([x_norm, y_norm])
 
 xy_norm[landmark_names[i]] = [x_norm, y_norm]
 i += 1
 else:
 # xy_norm = np.zeros([0,0] * 33)
 
 # xy = {landmark_names: [0,0]}
 # xy_norm = {landmark_names: [0,0]}
 
 xy_norm = dict(zip(landmark_names, [0,0] * 33))
 
 return xy_norm

In [248]:
def get_coordinates(landmarks, mp_pose, side, joint):
 """
 Retrieves x and y coordinates of a particular keypoint from the pose estimation model
 
 Args:
 landmarks: processed keypoints from the pose estimation model
 mp_pose: Mediapipe pose estimation model
 side: 'left' or 'right'. Denotes the side of the body of the landmark of interest.
 joint: 'shoulder', 'elbow', 'wrist', 'hip', 'knee', or 'ankle'. Denotes which body joint is associated with the landmark of interest.
 
 """
 coord = getattr(mp_pose.PoseLandmark,side.upper()+"_"+joint.upper())
 x_coord_val = landmarks[coord.value].x
 y_coord_val = landmarks[coord.value].y
 return [x_coord_val, y_coord_val] 

In [249]:
def viz_coords(image, norm_coords, landmarks, mp_pose, side, joint):
 """
 Displays the joint angle value near the joint within the image frame
 
 """
 try:
 point = side.upper()+"_"+joint.upper()
 norm_coords = norm_coords[point]
 joint = get_coordinates(landmarks, mp_pose, side, joint)
 
 coords = [ '%.2f' % elem for elem in joint ]
 coords = ' '.join(str(coords))
 norm_coords = [ '%.2f' % elem for elem in norm_coords ]
 norm_coords = ' '.join(str(norm_coords))
 cv2.putText(image, coords, 
 tuple(np.multiply(joint, [640, 480]).astype(int)), 
 cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2, cv2.LINE_AA
 )
 cv2.putText(image, norm_coords, 
 tuple(np.multiply(joint, [640, 480]).astype(int) + 20), 
 cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 2, cv2.LINE_AA
 )
 except:
 pass
 return

In [250]:
cap = cv2.VideoCapture(0) # camera object
HEIGHT = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # webcam video frame height
WIDTH = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # webcam video frame width
FPS = int(cap.get(cv2.CAP_PROP_FPS)) # webcam video fram rate 

landmark_names = dir(mp_pose.PoseLandmark)[:-4]

# Set and test mediapipe model using webcam
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5, enable_segmentation=True) as pose:
 while cap.isOpened():

 # Read feed
 ret, frame = cap.read()
 
 # Make detection
 image, results = mediapipe_detection(frame, pose)
 
 # Extract landmarks
 try:
 landmarks = results.pose_landmarks.landmark
 except:
 pass
 
 # draw bounding box
 bounding_box, box_center = draw_detection(image, results)
 
 # Render detections
 draw_landmarks(image, results) 
 
 # normalize coordinates
 xy_norm = normalize(image, results, bounding_box, landmark_names) 
 viz_coords(image, xy_norm, landmarks, mp_pose, 'left', 'wrist') 
 viz_coords(image, xy_norm, landmarks, mp_pose, 'right', 'wrist') 
 
 # Display frame on screen
 cv2.imshow('OpenCV Feed', image)
 
 # Draw segmentation on the image.
 # To improve segmentation around boundaries, consider applying a joint
 # bilateral filter to "results.segmentation_mask" with "image".
 # tightness = 0.3 # Probability threshold in [0, 1] that says how "tight" to make the segmentation. Greater value => tighter.
 # condition = np.stack((results.segmentation_mask,) * 3, axis=-1) > tightness
 # bg_image = np.zeros(image.shape, dtype=np.uint8)
 # bg_image[:] = (192, 192, 192) # gray
 # image = np.where(condition, image, bg_image)
 
 # Exit / break out logic
 if cv2.waitKey(10) & 0xFF == ord('q'):
 break

 cap.release()
 cv2.destroyAllWindows()

In [251]:
cap.release()
cv2.destroyAllWindows()