Sign-language / src /display_gloss.py
Michael Faivre
Add src folder (templates, htmls, py codes)
c9f9492
raw
history blame
8.33 kB
import cv2
import json
import numpy as np
import pandas as pd
import os
import time
def draw_hands_connections(frame, hand_landmarks):
'''
Draw white lines between relevant points of hands landmarks
Parameters
----------
frame: numpy array, corresponding to the frame on which we want to draw
hand_landmarks: dictionnary, collecting the hands landmarks
Return
------
frame: numpy array, with the newly drawing of the hands
'''
# define hand_connections between keypoints
hand_connections = [[0, 1], [1, 2], [2, 3], [3, 4],
[5, 6], [6, 7], [7, 8],
[9, 10], [10, 11], [11, 12],
[13, 14], [14, 15], [15, 16],
[17, 18], [18, 19], [19, 20]] #[5, 2], [0, 17]]
# loop to draw left hand connection
for connection in hand_connections:
landmark_start = hand_landmarks['left_hand'].get(str(connection[0]))
landmark_end = hand_landmarks['left_hand'].get(str(connection[1]))
cv2.line(frame, landmark_start, landmark_end, (255, 255, 255), 2)
# loop to to draw right hand connection
for connection in hand_connections:
landmark_start = hand_landmarks['right_hand'].get(str(connection[0]))
landmark_end = hand_landmarks['right_hand'].get(str(connection[1]))
cv2.line(frame, landmark_start, landmark_end, (255, 255, 255), 2)
return frame
def draw_pose_connections(frame, pose_landmarks):
'''
Draw white lines between relevant points of pose landmarks
Parameters
----------
frame: numpy array, corresponding to the frame on which we want to draw
hand_landmarks: dictionnary, collecting the pose landmarks
Return
------
frame: numpy array, with the newly drawing of the pose
'''
# define pose connections
pose_connections = [[11, 12], [11, 13], [12, 14], [13, 15], [14, 16]]
for connection in pose_connections:
landmark_start = pose_landmarks.get(str(connection[0]))
landmark_end = pose_landmarks.get(str(connection[1]))
cv2.line(frame, landmark_start, landmark_end, (255, 255, 255), 2)
return frame
def draw_face_connections(frame, face_landmarks):
'''
Draw white lines between relevant points of face landmarks
Parameters
----------
frame: numpy array, corresponding to the frame on which we want to draw
hand_landmarks: dictionnary, collecting the face landmarks
Return
------
frame: numpy array, with the newly drawing of the face
'''
# define pose connections
connections_dict = {'lipsUpperInner_connections' : [78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308],\
'lipsLowerInner_connections' : [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308],\
'rightEyeUpper0_connections': [246, 161, 160, 159, 158, 157, 173],\
'rightEyeLower0' : [33, 7, 163, 144, 145, 153, 154, 155, 133],\
'rightEyebrowLower' : [35, 124, 46, 53, 52, 65],\
'leftEyeUpper0' : [466, 388, 387, 386, 385, 384, 398],\
'leftEyeLower0' : [263, 249, 390, 373, 374, 380, 381, 382, 362],\
'leftEyebrowLower' : [265, 353, 276, 283, 282, 295],\
'noseTip_midwayBetweenEye' : [1, 168],\
'noseTip_noseRightCorner' : [1, 98],\
'noseTip_LeftCorner' : [1, 327]\
}
for keypoints_list in connections_dict.values():
for index in range(len(keypoints_list)):
if index + 1 < len(keypoints_list):
landmark_start = face_landmarks.get(str(keypoints_list[index]))
landmark_end = face_landmarks.get(str(keypoints_list[index+1]))
cv2.line(frame, landmark_start, landmark_end, (255, 255, 255), 1)
return frame
def resize_landmarks(landmarks, resize_rate_width, resize_rate_height):
for keypoint in landmarks.keys():
landmark_x, landmark_y = landmarks[keypoint]
landmarks[keypoint] = [int(resize_rate_width * landmark_x), int(resize_rate_height*landmark_y)]
return landmarks
def generate_video(gloss_list, dataset, vocabulary_list):
# size of video of signer 11
# FIXED_WIDTH, FIXED_HEIGHT, = 288, 192,
FIXED_WIDTH, FIXED_HEIGHT = 576, 384
fps = 8 #25 # FPS souhaité
for gloss in gloss_list:
if not check_gloss_in_vocabulary(gloss, vocabulary_list):
continue
video_id = select_video_id_from_gloss(gloss, dataset)
video_landmarks_path = dataset.loc[dataset['video_id'] == video_id, 'video_landmarks_path'].values[0]
with open(video_landmarks_path, 'r') as f:
video_landmarks = json.load(f)
width = video_landmarks[-1].get('width')
height = video_landmarks[-1].get('height')
# calculate resize rate
resize_rate_width, resize_rate_height = FIXED_WIDTH / width, FIXED_HEIGHT/height
text = gloss
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 1
font_color = (0, 255, 0)
thickness = 2
line_type = cv2.LINE_AA
for frame_landmarks in video_landmarks[:-1]:
blank_image = np.zeros((FIXED_HEIGHT, FIXED_WIDTH, 3), dtype=np.uint8)
frame_hands_landmarks = frame_landmarks['hands_landmarks']
frame_pose_landmarks = frame_landmarks['pose_landmarks']
frame_face_landmarks = frame_landmarks['face_landmarks']
#left_hand_landmarks_xy = [(x, y) for x, y in frame_hands_landmarks['left_hand'].values()]
#right_hand_landmarks_xy = [(x, y) for x, y in frame_hands_landmarks['right_hand'].values()]
#for x, y in left_hand_landmarks_xy:
# cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)
#for x, y in right_hand_landmarks_xy:
# cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)
# pose_landmarks_xy = [(x, y) for x, y in frame_pose_landmarks.values()]
# for x, y in pose_landmarks_xy:
# cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)
# face_landmarks_xy = [(x, y) for x, y in frame_face_landmarks.values()]
# for x, y in face_landmarks_xy:
# cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)
frame_hands_landmarks_rs = {
'left_hand': resize_landmarks(frame_hands_landmarks['left_hand'], resize_rate_width, resize_rate_height),
'right_hand': resize_landmarks(frame_hands_landmarks['right_hand'], resize_rate_width, resize_rate_height)
}
frame_pose_landmarks_rs = resize_landmarks(frame_pose_landmarks, resize_rate_width, resize_rate_height)
frame_face_landmarks_rs = resize_landmarks(frame_face_landmarks, resize_rate_width, resize_rate_height)
draw_hands_connections(blank_image, frame_hands_landmarks_rs)
draw_pose_connections(blank_image, frame_pose_landmarks_rs)
draw_face_connections(blank_image, frame_face_landmarks_rs)
text_size, _ = cv2.getTextSize(text, font, font_scale, thickness)
text_x = (FIXED_WIDTH - text_size[0]) // 2
text_y = FIXED_HEIGHT - 10
cv2.putText(blank_image, text, (text_x, text_y), font, font_scale, font_color, thickness, line_type)
# Convertir l'image en JPEG encodé
_, buffer = cv2.imencode('.jpg', blank_image)
frame = buffer.tobytes()
yield (b'--frame\r\n'
b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')
time.sleep(1 / fps)
def load_data(dataset_path='local_dataset'):
filepath = os.path.join(dataset_path)
data_df = pd.read_csv(filepath, dtype={'video_id': str})
vocabulary_list = data_df['gloss'].tolist()
return data_df, vocabulary_list
def check_gloss_in_vocabulary(gloss, vocabulary_list):
return gloss in vocabulary_list
def select_video_id_from_gloss(gloss, dataset):
filtered_data_id_11 = dataset.loc[dataset['signer_id'] == 11]
if gloss in filtered_data_id_11['gloss'].tolist():
video_id = filtered_data_id_11.loc[filtered_data_id_11['gloss'] == gloss, 'video_id'].values
else:
video_id = dataset.loc[dataset['gloss'] == gloss, 'video_id'].values
return video_id[0]