import cv2 import mediapipe as mp import pandas as pd import numpy as np def generate_column_names(): """ Generate column names for a DataFrame that will store coordinates of landmarks. Column names are formatted as '{coordinate}_{landmark_type}_{landmark_index}'. Returns: list: A list of strings representing the column names. """ columns = ['frame'] # face columns for coordinate in ['x', 'y']: for i in range(468): # Mediapipe face mesh contains 468 landmarks columns.append(f'{coordinate}_face_{i}') # hands columns for hand in ['left_hand', 'right_hand']: for coordinate in ['x', 'y']: for i in range(21): # Mediapipe hand model contains 21 landmarks columns.append(f'{coordinate}_{hand}_{i}') return columns def video_to_landmarks(video_path, columns): """ Extract face and hand landmarks from a video and store them in a DataFrame. The video is processed frame by frame. For each frame, face and hand landmarks are detected using MediaPipe's face mesh and hand models, respectively. The coordinates of the landmarks are stored in a DataFrame. Parameters: video_path (str): Path to the video file. columns (list): List of column names for the DataFrame. Returns: pd.DataFrame: A DataFrame where each row corresponds to a frame and each column corresponds to a landmark. """ mp_drawing = mp.solutions.drawing_utils mp_face_mesh = mp.solutions.face_mesh mp_hands = mp.solutions.hands cap = cv2.VideoCapture(video_path) df = pd.DataFrame(columns=columns) with mp_face_mesh.FaceMesh() as face_mesh, mp_hands.Hands(max_num_hands=2) as hands: frame_count = 0 while cap.isOpened(): success, frame = cap.read() if not success: break rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) results_face = face_mesh.process(rgb_frame) results_hands = hands.process(rgb_frame) # Initialize frame dictionary with NaNs frame_data = {column: np.NaN for column in columns} frame_data['frame'] = frame_count # Process face landmarks if results_face.multi_face_landmarks: for face_landmarks in results_face.multi_face_landmarks: for i, landmark in enumerate(face_landmarks.landmark): frame_data[f'x_face_{i}'] = landmark.x frame_data[f'y_face_{i}'] = landmark.y # Process hand landmarks if results_hands.multi_hand_landmarks: for hand_landmarks in results_hands.multi_hand_landmarks: if hand_landmarks.landmark[mp_hands.HandLandmark.WRIST].x < hand_landmarks.landmark[ mp_hands.HandLandmark.THUMB_TIP].x: hand_type = 'left_hand' else: hand_type = 'right_hand' for i, landmark in enumerate(hand_landmarks.landmark): frame_data[f'x_{hand_type}_{i}'] = landmark.x frame_data[f'y_{hand_type}_{i}'] = landmark.y df = df._append(frame_data, ignore_index=True) frame_count += 1 cap.release() return df # video_path = "videoplayback_with_landmarks.mp4" # df = video_to_landmarks(video_path, generate_column_names()) # # # Save the DataFrame to a CSV file # df.to_csv('landmarks.csv', index=False)