File size: 8,291 Bytes
c9f9492
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73c6cca
c9f9492
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73c6cca
c9f9492
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import cv2
import json
import numpy as np
import pandas as pd
import os
import time

def draw_hands_connections(frame, hand_landmarks):
    '''
    Draw white lines between relevant points of hands landmarks
    
    Parameters
    ----------
    frame: numpy array, corresponding to the frame on which we want to draw
    hand_landmarks: dictionnary, collecting the hands landmarks

    Return
    ------
    frame: numpy array, with the newly drawing of the hands
    '''
    # define hand_connections between keypoints
    hand_connections = [[0, 1], [1, 2], [2, 3], [3, 4],
                        [5, 6], [6, 7], [7, 8],
                        [9, 10], [10, 11], [11, 12],
                        [13, 14], [14, 15], [15, 16],
                        [17, 18], [18, 19], [19, 20]] #[5, 2], [0, 17]]
    
    # loop to draw left hand connection
    for connection in hand_connections:
        landmark_start = hand_landmarks['left_hand'].get(str(connection[0]))
        landmark_end = hand_landmarks['left_hand'].get(str(connection[1]))
        cv2.line(frame, landmark_start, landmark_end, (255, 255, 255), 2)
    
    # loop to to draw right hand connection
    for connection in hand_connections:
        landmark_start = hand_landmarks['right_hand'].get(str(connection[0]))
        landmark_end = hand_landmarks['right_hand'].get(str(connection[1]))
        cv2.line(frame, landmark_start, landmark_end, (255, 255, 255), 2)
    
    return frame

def draw_pose_connections(frame, pose_landmarks):
    '''
    Draw white lines between relevant points of pose landmarks
    
    Parameters
    ----------
    frame: numpy array, corresponding to the frame on which we want to draw
    hand_landmarks: dictionnary, collecting the pose landmarks
    
    Return
    ------
    frame: numpy array, with the newly drawing of the pose
    '''
    # define pose connections
    pose_connections = [[11, 12], [11, 13], [12, 14], [13, 15], [14, 16]]

    for connection in pose_connections:
        landmark_start = pose_landmarks.get(str(connection[0]))
        landmark_end = pose_landmarks.get(str(connection[1]))
        cv2.line(frame, landmark_start, landmark_end, (255, 255, 255), 2)

    return frame

def draw_face_connections(frame, face_landmarks):
    '''
    Draw white lines between relevant points of face landmarks
    
    Parameters
    ----------
    frame: numpy array, corresponding to the frame on which we want to draw
    hand_landmarks: dictionnary, collecting the face landmarks
    
    Return
    ------
    frame: numpy array, with the newly drawing of the face
    '''
    # define pose connections
    connections_dict = {'lipsUpperInner_connections' : [78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308],\
    'lipsLowerInner_connections' : [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308],\
    'rightEyeUpper0_connections': [246, 161, 160, 159, 158, 157, 173],\
    'rightEyeLower0' : [33, 7, 163, 144, 145, 153, 154, 155, 133],\
    'rightEyebrowLower' : [35, 124, 46, 53, 52, 65],\
    'leftEyeUpper0' : [466, 388, 387, 386, 385, 384, 398],\
    'leftEyeLower0' : [263, 249, 390, 373, 374, 380, 381, 382, 362],\
    'leftEyebrowLower' : [265, 353, 276, 283, 282, 295],\
    'noseTip_midwayBetweenEye' :  [1, 168],\
    'noseTip_noseRightCorner' : [1, 98],\
    'noseTip_LeftCorner' : [1, 327]\
    }

    for keypoints_list in connections_dict.values():
        for index in range(len(keypoints_list)):
            if index + 1 < len(keypoints_list):
                landmark_start = face_landmarks.get(str(keypoints_list[index]))
                landmark_end = face_landmarks.get(str(keypoints_list[index+1]))
                cv2.line(frame, landmark_start, landmark_end, (255, 255, 255), 1)
    return frame

def resize_landmarks(landmarks, resize_rate_width, resize_rate_height):
    for keypoint in landmarks.keys():
        landmark_x, landmark_y = landmarks[keypoint]
        landmarks[keypoint] = [int(resize_rate_width * landmark_x), int(resize_rate_height*landmark_y)]
    return landmarks

def generate_video(gloss_list, dataset, vocabulary_list):
    # size of video of signer 11
    # FIXED_WIDTH, FIXED_HEIGHT, = 288, 192,
    FIXED_WIDTH,  FIXED_HEIGHT = 576, 384
    fps = 25

    for gloss in gloss_list:
        if not check_gloss_in_vocabulary(gloss, vocabulary_list):
            continue
        video_id = select_video_id_from_gloss(gloss, dataset)
        video_landmarks_path = dataset.loc[dataset['video_id'] == video_id, 'video_landmarks_path'].values[0]
        with open(video_landmarks_path, 'r') as f:
            video_landmarks = json.load(f)
        width = video_landmarks[-1].get('width')
        height = video_landmarks[-1].get('height')

        # calculate resize rate
        resize_rate_width, resize_rate_height  = FIXED_WIDTH / width, FIXED_HEIGHT/height

        text = gloss
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 1
        font_color = (0, 255, 0)
        thickness = 2
        line_type = cv2.LINE_AA

        for frame_landmarks in video_landmarks[:-1]:
            blank_image = np.zeros((FIXED_HEIGHT, FIXED_WIDTH, 3), dtype=np.uint8)
            frame_hands_landmarks = frame_landmarks['hands_landmarks']
            frame_pose_landmarks = frame_landmarks['pose_landmarks']
            frame_face_landmarks = frame_landmarks['face_landmarks']

            #left_hand_landmarks_xy = [(x, y) for x, y in frame_hands_landmarks['left_hand'].values()]
            #right_hand_landmarks_xy = [(x, y) for x, y in frame_hands_landmarks['right_hand'].values()]

            #for x, y in left_hand_landmarks_xy:
            #    cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)
            #for x, y in right_hand_landmarks_xy:
            # cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)

            # pose_landmarks_xy = [(x, y) for x, y in frame_pose_landmarks.values()]
            # for x, y in pose_landmarks_xy:
            #     cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)

            # face_landmarks_xy = [(x, y) for x, y in frame_face_landmarks.values()]
            # for x, y in face_landmarks_xy:
            #     cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)
            frame_hands_landmarks_rs = {
                            'left_hand': resize_landmarks(frame_hands_landmarks['left_hand'], resize_rate_width, resize_rate_height),
                            'right_hand': resize_landmarks(frame_hands_landmarks['right_hand'], resize_rate_width, resize_rate_height)
                                        }
            frame_pose_landmarks_rs = resize_landmarks(frame_pose_landmarks, resize_rate_width, resize_rate_height)
            frame_face_landmarks_rs = resize_landmarks(frame_face_landmarks, resize_rate_width, resize_rate_height)
            draw_hands_connections(blank_image, frame_hands_landmarks_rs)
            draw_pose_connections(blank_image, frame_pose_landmarks_rs)
            draw_face_connections(blank_image, frame_face_landmarks_rs)

            text_size, _ = cv2.getTextSize(text, font, font_scale, thickness)
            text_x = (FIXED_WIDTH - text_size[0]) // 2
            text_y = FIXED_HEIGHT - 10
            cv2.putText(blank_image, text, (text_x, text_y), font, font_scale, font_color, thickness, line_type)
            
            # Convertir l'image en JPEG encodé
            _, buffer = cv2.imencode('.jpg', blank_image)
            frame = buffer.tobytes()

            yield (b'--frame\r\n'
                   b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')

            time.sleep(1 / fps)


def load_data(dataset_path='local_dataset'):
    filepath = dataset_path
    data_df = pd.read_csv(filepath, dtype={'video_id': str})
    vocabulary_list = data_df['gloss'].tolist()
    return data_df, vocabulary_list

def check_gloss_in_vocabulary(gloss, vocabulary_list):
    return gloss in vocabulary_list

def select_video_id_from_gloss(gloss, dataset):
    filtered_data_id_11 = dataset.loc[dataset['signer_id'] == 11]
    if gloss in filtered_data_id_11['gloss'].tolist():
        video_id = filtered_data_id_11.loc[filtered_data_id_11['gloss'] == gloss, 'video_id'].values
    else:
        video_id = dataset.loc[dataset['gloss'] == gloss, 'video_id'].values
    return video_id[0]