Spaces:
Sleeping
Sleeping
import cv2 | |
import mediapipe as mp | |
import pandas as pd | |
import numpy as np | |
def generate_column_names(): | |
""" | |
Generate column names for a DataFrame that will store coordinates of landmarks. | |
Column names are formatted as '{coordinate}_{landmark_type}_{landmark_index}'. | |
Returns: | |
list: A list of strings representing the column names. | |
""" | |
columns = ['frame'] | |
# face columns | |
for coordinate in ['x', 'y']: | |
for i in range(468): # Mediapipe face mesh contains 468 landmarks | |
columns.append(f'{coordinate}_face_{i}') | |
# hands columns | |
for hand in ['left_hand', 'right_hand']: | |
for coordinate in ['x', 'y']: | |
for i in range(21): # Mediapipe hand model contains 21 landmarks | |
columns.append(f'{coordinate}_{hand}_{i}') | |
return columns | |
def video_to_landmarks(video_path, columns): | |
""" | |
Extract face and hand landmarks from a video and store them in a DataFrame. | |
The video is processed frame by frame. For each frame, face and hand landmarks | |
are detected using MediaPipe's face mesh and hand models, respectively. | |
The coordinates of the landmarks are stored in a DataFrame. | |
Parameters: | |
video_path (str): Path to the video file. | |
columns (list): List of column names for the DataFrame. | |
Returns: | |
pd.DataFrame: A DataFrame where each row corresponds to a frame and each column corresponds to a landmark. | |
""" | |
mp_drawing = mp.solutions.drawing_utils | |
mp_face_mesh = mp.solutions.face_mesh | |
mp_hands = mp.solutions.hands | |
cap = cv2.VideoCapture(video_path) | |
df = pd.DataFrame(columns=columns) | |
with mp_face_mesh.FaceMesh() as face_mesh, mp_hands.Hands(max_num_hands=2) as hands: | |
frame_count = 0 | |
while cap.isOpened(): | |
success, frame = cap.read() | |
if not success: | |
break | |
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
results_face = face_mesh.process(rgb_frame) | |
results_hands = hands.process(rgb_frame) | |
# Initialize frame dictionary with NaNs | |
frame_data = {column: np.NaN for column in columns} | |
frame_data['frame'] = frame_count | |
# Process face landmarks | |
if results_face.multi_face_landmarks: | |
for face_landmarks in results_face.multi_face_landmarks: | |
for i, landmark in enumerate(face_landmarks.landmark): | |
frame_data[f'x_face_{i}'] = landmark.x | |
frame_data[f'y_face_{i}'] = landmark.y | |
# Process hand landmarks | |
if results_hands.multi_hand_landmarks: | |
for hand_landmarks in results_hands.multi_hand_landmarks: | |
if hand_landmarks.landmark[mp_hands.HandLandmark.WRIST].x < hand_landmarks.landmark[ | |
mp_hands.HandLandmark.THUMB_TIP].x: | |
hand_type = 'left_hand' | |
else: | |
hand_type = 'right_hand' | |
for i, landmark in enumerate(hand_landmarks.landmark): | |
frame_data[f'x_{hand_type}_{i}'] = landmark.x | |
frame_data[f'y_{hand_type}_{i}'] = landmark.y | |
df = df._append(frame_data, ignore_index=True) | |
frame_count += 1 | |
cap.release() | |
return df | |
# video_path = "videoplayback_with_landmarks.mp4" | |
# df = video_to_landmarks(video_path, generate_column_names()) | |
# | |
# # Save the DataFrame to a CSV file | |
# df.to_csv('landmarks.csv', index=False) | |