Spaces:

ArielDrabkin
/

Spell-Net

Runtime error

Spell-Net / src /video_to_landmark_coordinates.py

Ariel

Update files

c6cec04 over 1 year ago

3.56 kB

	import cv2
	import mediapipe as mp
	import pandas as pd
	import numpy as np


	def generate_column_names():
	"""
	Generate column names for a DataFrame that will store coordinates of landmarks.

	Column names are formatted as '{coordinate}_{landmark_type}_{landmark_index}'.

	Returns:
	list: A list of strings representing the column names.
	"""
	columns = ['frame']

	# face columns
	for coordinate in ['x', 'y']:
	for i in range(468): # Mediapipe face mesh contains 468 landmarks
	columns.append(f'{coordinate}_face_{i}')

	# hands columns
	for hand in ['left_hand', 'right_hand']:
	for coordinate in ['x', 'y']:
	for i in range(21): # Mediapipe hand model contains 21 landmarks
	columns.append(f'{coordinate}_{hand}_{i}')

	return columns


	def video_to_landmarks(video_path, columns):
	"""
	Extract face and hand landmarks from a video and store them in a DataFrame.

	The video is processed frame by frame. For each frame, face and hand landmarks
	are detected using MediaPipe's face mesh and hand models, respectively.
	The coordinates of the landmarks are stored in a DataFrame.

	Parameters:
	video_path (str): Path to the video file.
	columns (list): List of column names for the DataFrame.

	Returns:
	pd.DataFrame: A DataFrame where each row corresponds to a frame and each column corresponds to a landmark.
	"""
	mp_drawing = mp.solutions.drawing_utils
	mp_face_mesh = mp.solutions.face_mesh
	mp_hands = mp.solutions.hands

	cap = cv2.VideoCapture(video_path)
	df = pd.DataFrame(columns=columns)

	with mp_face_mesh.FaceMesh() as face_mesh, mp_hands.Hands(max_num_hands=2) as hands:
	frame_count = 0
	while cap.isOpened():
	success, frame = cap.read()
	if not success:
	break

	rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	results_face = face_mesh.process(rgb_frame)
	results_hands = hands.process(rgb_frame)

	# Initialize frame dictionary with NaNs
	frame_data = {column: np.NaN for column in columns}
	frame_data['frame'] = frame_count

	# Process face landmarks
	if results_face.multi_face_landmarks:
	for face_landmarks in results_face.multi_face_landmarks:
	for i, landmark in enumerate(face_landmarks.landmark):
	frame_data[f'x_face_{i}'] = landmark.x
	frame_data[f'y_face_{i}'] = landmark.y

	# Process hand landmarks
	if results_hands.multi_hand_landmarks:
	for hand_landmarks in results_hands.multi_hand_landmarks:
	if hand_landmarks.landmark[mp_hands.HandLandmark.WRIST].x < hand_landmarks.landmark[
	mp_hands.HandLandmark.THUMB_TIP].x:
	hand_type = 'left_hand'
	else:
	hand_type = 'right_hand'

	for i, landmark in enumerate(hand_landmarks.landmark):
	frame_data[f'x_{hand_type}_{i}'] = landmark.x
	frame_data[f'y_{hand_type}_{i}'] = landmark.y

	df = df._append(frame_data, ignore_index=True)
	frame_count += 1

	cap.release()

	return df

	# video_path = "videoplayback_with_landmarks.mp4"
	# df = video_to_landmarks(video_path, generate_column_names())
	#
	# # Save the DataFrame to a CSV file
	# df.to_csv('landmarks.csv', index=False)