Spaces:

ginigen
/

Sign-language

Building

Sign-language / src /display_gloss.py

Michael Faivre

Add src folder (templates, htmls, py codes)

c9f9492 12 months ago

8.33 kB

	import cv2
	import json
	import numpy as np
	import pandas as pd
	import os
	import time

	def draw_hands_connections(frame, hand_landmarks):
	'''
	Draw white lines between relevant points of hands landmarks

	Parameters
	----------
	frame: numpy array, corresponding to the frame on which we want to draw
	hand_landmarks: dictionnary, collecting the hands landmarks

	Return
	------
	frame: numpy array, with the newly drawing of the hands
	'''
	# define hand_connections between keypoints
	hand_connections = [[0, 1], [1, 2], [2, 3], [3, 4],
	[5, 6], [6, 7], [7, 8],
	[9, 10], [10, 11], [11, 12],
	[13, 14], [14, 15], [15, 16],
	[17, 18], [18, 19], [19, 20]] #[5, 2], [0, 17]]

	# loop to draw left hand connection
	for connection in hand_connections:
	landmark_start = hand_landmarks['left_hand'].get(str(connection[0]))
	landmark_end = hand_landmarks['left_hand'].get(str(connection[1]))
	cv2.line(frame, landmark_start, landmark_end, (255, 255, 255), 2)

	# loop to to draw right hand connection
	for connection in hand_connections:
	landmark_start = hand_landmarks['right_hand'].get(str(connection[0]))
	landmark_end = hand_landmarks['right_hand'].get(str(connection[1]))
	cv2.line(frame, landmark_start, landmark_end, (255, 255, 255), 2)

	return frame

	def draw_pose_connections(frame, pose_landmarks):
	'''
	Draw white lines between relevant points of pose landmarks

	Parameters
	----------
	frame: numpy array, corresponding to the frame on which we want to draw
	hand_landmarks: dictionnary, collecting the pose landmarks

	Return
	------
	frame: numpy array, with the newly drawing of the pose
	'''
	# define pose connections
	pose_connections = [[11, 12], [11, 13], [12, 14], [13, 15], [14, 16]]

	for connection in pose_connections:
	landmark_start = pose_landmarks.get(str(connection[0]))
	landmark_end = pose_landmarks.get(str(connection[1]))
	cv2.line(frame, landmark_start, landmark_end, (255, 255, 255), 2)

	return frame

	def draw_face_connections(frame, face_landmarks):
	'''
	Draw white lines between relevant points of face landmarks

	Parameters
	----------
	frame: numpy array, corresponding to the frame on which we want to draw
	hand_landmarks: dictionnary, collecting the face landmarks

	Return
	------
	frame: numpy array, with the newly drawing of the face
	'''
	# define pose connections
	connections_dict = {'lipsUpperInner_connections' : [78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308],\
	'lipsLowerInner_connections' : [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308],\
	'rightEyeUpper0_connections': [246, 161, 160, 159, 158, 157, 173],\
	'rightEyeLower0' : [33, 7, 163, 144, 145, 153, 154, 155, 133],\
	'rightEyebrowLower' : [35, 124, 46, 53, 52, 65],\
	'leftEyeUpper0' : [466, 388, 387, 386, 385, 384, 398],\
	'leftEyeLower0' : [263, 249, 390, 373, 374, 380, 381, 382, 362],\
	'leftEyebrowLower' : [265, 353, 276, 283, 282, 295],\
	'noseTip_midwayBetweenEye' : [1, 168],\
	'noseTip_noseRightCorner' : [1, 98],\
	'noseTip_LeftCorner' : [1, 327]\
	}

	for keypoints_list in connections_dict.values():
	for index in range(len(keypoints_list)):
	if index + 1 < len(keypoints_list):
	landmark_start = face_landmarks.get(str(keypoints_list[index]))
	landmark_end = face_landmarks.get(str(keypoints_list[index+1]))
	cv2.line(frame, landmark_start, landmark_end, (255, 255, 255), 1)
	return frame

	def resize_landmarks(landmarks, resize_rate_width, resize_rate_height):
	for keypoint in landmarks.keys():
	landmark_x, landmark_y = landmarks[keypoint]
	landmarks[keypoint] = [int(resize_rate_width * landmark_x), int(resize_rate_height*landmark_y)]
	return landmarks

	def generate_video(gloss_list, dataset, vocabulary_list):
	# size of video of signer 11
	# FIXED_WIDTH, FIXED_HEIGHT, = 288, 192,
	FIXED_WIDTH, FIXED_HEIGHT = 576, 384
	fps = 8 #25 # FPS souhaité

	for gloss in gloss_list:
	if not check_gloss_in_vocabulary(gloss, vocabulary_list):
	continue
	video_id = select_video_id_from_gloss(gloss, dataset)
	video_landmarks_path = dataset.loc[dataset['video_id'] == video_id, 'video_landmarks_path'].values[0]
	with open(video_landmarks_path, 'r') as f:
	video_landmarks = json.load(f)
	width = video_landmarks[-1].get('width')
	height = video_landmarks[-1].get('height')

	# calculate resize rate
	resize_rate_width, resize_rate_height = FIXED_WIDTH / width, FIXED_HEIGHT/height

	text = gloss
	font = cv2.FONT_HERSHEY_SIMPLEX
	font_scale = 1
	font_color = (0, 255, 0)
	thickness = 2
	line_type = cv2.LINE_AA

	for frame_landmarks in video_landmarks[:-1]:
	blank_image = np.zeros((FIXED_HEIGHT, FIXED_WIDTH, 3), dtype=np.uint8)
	frame_hands_landmarks = frame_landmarks['hands_landmarks']
	frame_pose_landmarks = frame_landmarks['pose_landmarks']
	frame_face_landmarks = frame_landmarks['face_landmarks']

	#left_hand_landmarks_xy = [(x, y) for x, y in frame_hands_landmarks['left_hand'].values()]
	#right_hand_landmarks_xy = [(x, y) for x, y in frame_hands_landmarks['right_hand'].values()]

	#for x, y in left_hand_landmarks_xy:
	# cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)
	#for x, y in right_hand_landmarks_xy:
	# cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)

	# pose_landmarks_xy = [(x, y) for x, y in frame_pose_landmarks.values()]
	# for x, y in pose_landmarks_xy:
	# cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)

	# face_landmarks_xy = [(x, y) for x, y in frame_face_landmarks.values()]
	# for x, y in face_landmarks_xy:
	# cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)
	frame_hands_landmarks_rs = {
	'left_hand': resize_landmarks(frame_hands_landmarks['left_hand'], resize_rate_width, resize_rate_height),
	'right_hand': resize_landmarks(frame_hands_landmarks['right_hand'], resize_rate_width, resize_rate_height)
	}
	frame_pose_landmarks_rs = resize_landmarks(frame_pose_landmarks, resize_rate_width, resize_rate_height)
	frame_face_landmarks_rs = resize_landmarks(frame_face_landmarks, resize_rate_width, resize_rate_height)
	draw_hands_connections(blank_image, frame_hands_landmarks_rs)
	draw_pose_connections(blank_image, frame_pose_landmarks_rs)
	draw_face_connections(blank_image, frame_face_landmarks_rs)

	text_size, _ = cv2.getTextSize(text, font, font_scale, thickness)
	text_x = (FIXED_WIDTH - text_size[0]) // 2
	text_y = FIXED_HEIGHT - 10
	cv2.putText(blank_image, text, (text_x, text_y), font, font_scale, font_color, thickness, line_type)

	# Convertir l'image en JPEG encodé
	_, buffer = cv2.imencode('.jpg', blank_image)
	frame = buffer.tobytes()

	yield (b'--frame\r\n'
	b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')

	time.sleep(1 / fps)


	def load_data(dataset_path='local_dataset'):
	filepath = os.path.join(dataset_path)
	data_df = pd.read_csv(filepath, dtype={'video_id': str})
	vocabulary_list = data_df['gloss'].tolist()
	return data_df, vocabulary_list

	def check_gloss_in_vocabulary(gloss, vocabulary_list):
	return gloss in vocabulary_list

	def select_video_id_from_gloss(gloss, dataset):
	filtered_data_id_11 = dataset.loc[dataset['signer_id'] == 11]
	if gloss in filtered_data_id_11['gloss'].tolist():
	video_id = filtered_data_id_11.loc[filtered_data_id_11['gloss'] == gloss, 'video_id'].values
	else:
	video_id = dataset.loc[dataset['gloss'] == gloss, 'video_id'].values
	return video_id[0]