Spaces:
Building
Building
File size: 8,292 Bytes
c9f9492 73c6cca c9f9492 73c6cca c9f9492 2e870f3 c9f9492 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
import cv2
import json
import numpy as np
import pandas as pd
import os
import time
def draw_hands_connections(frame, hand_landmarks):
'''
Draw white lines between relevant points of hands landmarks
Parameters
----------
frame: numpy array, corresponding to the frame on which we want to draw
hand_landmarks: dictionnary, collecting the hands landmarks
Return
------
frame: numpy array, with the newly drawing of the hands
'''
# define hand_connections between keypoints
hand_connections = [[0, 1], [1, 2], [2, 3], [3, 4],
[5, 6], [6, 7], [7, 8],
[9, 10], [10, 11], [11, 12],
[13, 14], [14, 15], [15, 16],
[17, 18], [18, 19], [19, 20]] #[5, 2], [0, 17]]
# loop to draw left hand connection
for connection in hand_connections:
landmark_start = hand_landmarks['left_hand'].get(str(connection[0]))
landmark_end = hand_landmarks['left_hand'].get(str(connection[1]))
cv2.line(frame, landmark_start, landmark_end, (255, 255, 255), 2)
# loop to to draw right hand connection
for connection in hand_connections:
landmark_start = hand_landmarks['right_hand'].get(str(connection[0]))
landmark_end = hand_landmarks['right_hand'].get(str(connection[1]))
cv2.line(frame, landmark_start, landmark_end, (255, 255, 255), 2)
return frame
def draw_pose_connections(frame, pose_landmarks):
'''
Draw white lines between relevant points of pose landmarks
Parameters
----------
frame: numpy array, corresponding to the frame on which we want to draw
hand_landmarks: dictionnary, collecting the pose landmarks
Return
------
frame: numpy array, with the newly drawing of the pose
'''
# define pose connections
pose_connections = [[11, 12], [11, 13], [12, 14], [13, 15], [14, 16]]
for connection in pose_connections:
landmark_start = pose_landmarks.get(str(connection[0]))
landmark_end = pose_landmarks.get(str(connection[1]))
cv2.line(frame, landmark_start, landmark_end, (255, 255, 255), 2)
return frame
def draw_face_connections(frame, face_landmarks):
'''
Draw white lines between relevant points of face landmarks
Parameters
----------
frame: numpy array, corresponding to the frame on which we want to draw
hand_landmarks: dictionnary, collecting the face landmarks
Return
------
frame: numpy array, with the newly drawing of the face
'''
# define pose connections
connections_dict = {'lipsUpperInner_connections' : [78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308],\
'lipsLowerInner_connections' : [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308],\
'rightEyeUpper0_connections': [246, 161, 160, 159, 158, 157, 173],\
'rightEyeLower0' : [33, 7, 163, 144, 145, 153, 154, 155, 133],\
'rightEyebrowLower' : [35, 124, 46, 53, 52, 65],\
'leftEyeUpper0' : [466, 388, 387, 386, 385, 384, 398],\
'leftEyeLower0' : [263, 249, 390, 373, 374, 380, 381, 382, 362],\
'leftEyebrowLower' : [265, 353, 276, 283, 282, 295],\
'noseTip_midwayBetweenEye' : [1, 168],\
'noseTip_noseRightCorner' : [1, 98],\
'noseTip_LeftCorner' : [1, 327]\
}
for keypoints_list in connections_dict.values():
for index in range(len(keypoints_list)):
if index + 1 < len(keypoints_list):
landmark_start = face_landmarks.get(str(keypoints_list[index]))
landmark_end = face_landmarks.get(str(keypoints_list[index+1]))
cv2.line(frame, landmark_start, landmark_end, (255, 255, 255), 1)
return frame
def resize_landmarks(landmarks, resize_rate_width, resize_rate_height):
for keypoint in landmarks.keys():
landmark_x, landmark_y = landmarks[keypoint]
landmarks[keypoint] = [int(resize_rate_width * landmark_x), int(resize_rate_height*landmark_y)]
return landmarks
def generate_video(gloss_list, dataset, vocabulary_list):
# size of video of signer 11
# FIXED_WIDTH, FIXED_HEIGHT, = 288, 192,
FIXED_WIDTH, FIXED_HEIGHT = 576, 384
fps = 25
for gloss in gloss_list:
if not check_gloss_in_vocabulary(gloss, vocabulary_list):
continue
video_id = select_video_id_from_gloss(gloss, dataset)
video_landmarks_path = dataset.loc[dataset['video_id'] == video_id, 'video_landmarks_path'].values[0]
with open(video_landmarks_path, 'r') as f:
video_landmarks = json.load(f)
width = video_landmarks[-1].get('width')
height = video_landmarks[-1].get('height')
# calculate resize rate
resize_rate_width, resize_rate_height = FIXED_WIDTH / width, FIXED_HEIGHT/height
text = gloss
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 1
font_color = (0, 255, 0)
thickness = 2
line_type = cv2.LINE_AA
for frame_landmarks in video_landmarks[:-1]:
blank_image = np.zeros((FIXED_HEIGHT, FIXED_WIDTH, 3), dtype=np.uint8)
frame_hands_landmarks = frame_landmarks['hands_landmarks']
frame_pose_landmarks = frame_landmarks['pose_landmarks']
frame_face_landmarks = frame_landmarks['face_landmarks']
#left_hand_landmarks_xy = [(x, y) for x, y in frame_hands_landmarks['left_hand'].values()]
#right_hand_landmarks_xy = [(x, y) for x, y in frame_hands_landmarks['right_hand'].values()]
#for x, y in left_hand_landmarks_xy:
# cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)
#for x, y in right_hand_landmarks_xy:
# cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)
# pose_landmarks_xy = [(x, y) for x, y in frame_pose_landmarks.values()]
# for x, y in pose_landmarks_xy:
# cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)
# face_landmarks_xy = [(x, y) for x, y in frame_face_landmarks.values()]
# for x, y in face_landmarks_xy:
# cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)
frame_hands_landmarks_rs = {
'left_hand': resize_landmarks(frame_hands_landmarks['left_hand'], resize_rate_width, resize_rate_height),
'right_hand': resize_landmarks(frame_hands_landmarks['right_hand'], resize_rate_width, resize_rate_height)
}
frame_pose_landmarks_rs = resize_landmarks(frame_pose_landmarks, resize_rate_width, resize_rate_height)
frame_face_landmarks_rs = resize_landmarks(frame_face_landmarks, resize_rate_width, resize_rate_height)
draw_hands_connections(blank_image, frame_hands_landmarks_rs)
draw_pose_connections(blank_image, frame_pose_landmarks_rs)
draw_face_connections(blank_image, frame_face_landmarks_rs)
text_size, _ = cv2.getTextSize(text, font, font_scale, thickness)
text_x = (FIXED_WIDTH - text_size[0]) // 2
text_y = FIXED_HEIGHT - 10
cv2.putText(blank_image, text, (text_x, text_y), font, font_scale, font_color, thickness, line_type)
# Convertir l'image en JPEG encodé
_, buffer = cv2.imencode('.jpg', blank_image)
frame = buffer.tobytes()
yield (b'--frame\r\n'
b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')
time.sleep(1 / fps)
def load_data(dataset_path='local_dataset'):
filepath = dataset_path
data_df = pd.read_csv(filepath, dtype={'video_id': str})
vocabulary_list = data_df['gloss'].tolist()
return data_df, vocabulary_list
def check_gloss_in_vocabulary(gloss, vocabulary_list):
return gloss in vocabulary_list
def select_video_id_from_gloss(gloss, dataset):
filtered_data_id_11 = dataset.loc[dataset['signer_id'] == 11]
if gloss in filtered_data_id_11['gloss'].tolist():
video_id = filtered_data_id_11.loc[filtered_data_id_11['gloss'] == gloss, 'video_id'].values
else:
video_id = dataset.loc[dataset['gloss'] == gloss, 'video_id'].values
return video_id[0] |