Spaces:

ginigen
/

Sign-language

Building

File size: 8,291 Bytes

import cv2
import json
import numpy as np
import pandas as pd
import os
import time

def draw_hands_connections(frame, hand_landmarks):
    '''
    Draw white lines between relevant points of hands landmarks
    
    Parameters
    ----------
    frame: numpy array, corresponding to the frame on which we want to draw
    hand_landmarks: dictionnary, collecting the hands landmarks

    Return
    ------
    frame: numpy array, with the newly drawing of the hands
    '''
    # define hand_connections between keypoints
    hand_connections = [[0, 1], [1, 2], [2, 3], [3, 4],
                        [5, 6], [6, 7], [7, 8],
                        [9, 10], [10, 11], [11, 12],
                        [13, 14], [14, 15], [15, 16],
                        [17, 18], [18, 19], [19, 20]] #[5, 2], [0, 17]]
    
    # loop to draw left hand connection
    for connection in hand_connections:
        landmark_start = hand_landmarks['left_hand'].get(str(connection[0]))
        landmark_end = hand_landmarks['left_hand'].get(str(connection[1]))
        cv2.line(frame, landmark_start, landmark_end, (255, 255, 255), 2)
    
    # loop to to draw right hand connection
    for connection in hand_connections:
        landmark_start = hand_landmarks['right_hand'].get(str(connection[0]))
        landmark_end = hand_landmarks['right_hand'].get(str(connection[1]))
        cv2.line(frame, landmark_start, landmark_end, (255, 255, 255), 2)
    
    return frame

def draw_pose_connections(frame, pose_landmarks):
    '''
    Draw white lines between relevant points of pose landmarks
    
    Parameters
    ----------
    frame: numpy array, corresponding to the frame on which we want to draw
    hand_landmarks: dictionnary, collecting the pose landmarks
    
    Return
    ------
    frame: numpy array, with the newly drawing of the pose
    '''
    # define pose connections
    pose_connections = [[11, 12], [11, 13], [12, 14], [13, 15], [14, 16]]

    for connection in pose_connections:
        landmark_start = pose_landmarks.get(str(connection[0]))
        landmark_end = pose_landmarks.get(str(connection[1]))
        cv2.line(frame, landmark_start, landmark_end, (255, 255, 255), 2)

    return frame

def draw_face_connections(frame, face_landmarks):
    '''
    Draw white lines between relevant points of face landmarks
    
    Parameters
    ----------
    frame: numpy array, corresponding to the frame on which we want to draw
    hand_landmarks: dictionnary, collecting the face landmarks
    
    Return
    ------
    frame: numpy array, with the newly drawing of the face
    '''
    # define pose connections
    connections_dict = {'lipsUpperInner_connections' : [78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308],\
    'lipsLowerInner_connections' : [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308],\
    'rightEyeUpper0_connections': [246, 161, 160, 159, 158, 157, 173],\
    'rightEyeLower0' : [33, 7, 163, 144, 145, 153, 154, 155, 133],\
    'rightEyebrowLower' : [35, 124, 46, 53, 52, 65],\
    'leftEyeUpper0' : [466, 388, 387, 386, 385, 384, 398],\
    'leftEyeLower0' : [263, 249, 390, 373, 374, 380, 381, 382, 362],\
    'leftEyebrowLower' : [265, 353, 276, 283, 282, 295],\
    'noseTip_midwayBetweenEye' :  [1, 168],\
    'noseTip_noseRightCorner' : [1, 98],\
    'noseTip_LeftCorner' : [1, 327]\
    }

    for keypoints_list in connections_dict.values():
        for index in range(len(keypoints_list)):
            if index + 1 < len(keypoints_list):
                landmark_start = face_landmarks.get(str(keypoints_list[index]))
                landmark_end = face_landmarks.get(str(keypoints_list[index+1]))
                cv2.line(frame, landmark_start, landmark_end, (255, 255, 255), 1)
    return frame

def resize_landmarks(landmarks, resize_rate_width, resize_rate_height):
    for keypoint in landmarks.keys():
        landmark_x, landmark_y = landmarks[keypoint]
        landmarks[keypoint] = [int(resize_rate_width * landmark_x), int(resize_rate_height*landmark_y)]
    return landmarks

def generate_video(gloss_list, dataset, vocabulary_list):
    # size of video of signer 11
    # FIXED_WIDTH, FIXED_HEIGHT, = 288, 192,
    FIXED_WIDTH,  FIXED_HEIGHT = 576, 384
    fps = 25

    for gloss in gloss_list:
        if not check_gloss_in_vocabulary(gloss, vocabulary_list):
            continue
        video_id = select_video_id_from_gloss(gloss, dataset)
        video_landmarks_path = dataset.loc[dataset['video_id'] == video_id, 'video_landmarks_path'].values[0]
        with open(video_landmarks_path, 'r') as f:
            video_landmarks = json.load(f)
        width = video_landmarks[-1].get('width')
        height = video_landmarks[-1].get('height')

        # calculate resize rate
        resize_rate_width, resize_rate_height  = FIXED_WIDTH / width, FIXED_HEIGHT/height

        text = gloss
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 1
        font_color = (0, 255, 0)
        thickness = 2
        line_type = cv2.LINE_AA

        for frame_landmarks in video_landmarks[:-1]:
            blank_image = np.zeros((FIXED_HEIGHT, FIXED_WIDTH, 3), dtype=np.uint8)
            frame_hands_landmarks = frame_landmarks['hands_landmarks']
            frame_pose_landmarks = frame_landmarks['pose_landmarks']
            frame_face_landmarks = frame_landmarks['face_landmarks']

            #left_hand_landmarks_xy = [(x, y) for x, y in frame_hands_landmarks['left_hand'].values()]
            #right_hand_landmarks_xy = [(x, y) for x, y in frame_hands_landmarks['right_hand'].values()]

            #for x, y in left_hand_landmarks_xy:
            #    cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)
            #for x, y in right_hand_landmarks_xy:
            # cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)

            # pose_landmarks_xy = [(x, y) for x, y in frame_pose_landmarks.values()]
            # for x, y in pose_landmarks_xy:
            #     cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)

            # face_landmarks_xy = [(x, y) for x, y in frame_face_landmarks.values()]
            # for x, y in face_landmarks_xy:
            #     cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)
            frame_hands_landmarks_rs = {
                            'left_hand': resize_landmarks(frame_hands_landmarks['left_hand'], resize_rate_width, resize_rate_height),
                            'right_hand': resize_landmarks(frame_hands_landmarks['right_hand'], resize_rate_width, resize_rate_height)
                                        }
            frame_pose_landmarks_rs = resize_landmarks(frame_pose_landmarks, resize_rate_width, resize_rate_height)
            frame_face_landmarks_rs = resize_landmarks(frame_face_landmarks, resize_rate_width, resize_rate_height)
            draw_hands_connections(blank_image, frame_hands_landmarks_rs)
            draw_pose_connections(blank_image, frame_pose_landmarks_rs)
            draw_face_connections(blank_image, frame_face_landmarks_rs)

            text_size, _ = cv2.getTextSize(text, font, font_scale, thickness)
            text_x = (FIXED_WIDTH - text_size[0]) // 2
            text_y = FIXED_HEIGHT - 10
            cv2.putText(blank_image, text, (text_x, text_y), font, font_scale, font_color, thickness, line_type)
            
            # Convertir l'image en JPEG encodé
            _, buffer = cv2.imencode('.jpg', blank_image)
            frame = buffer.tobytes()

            yield (b'--frame\r\n'
                   b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')

            time.sleep(1 / fps)


def load_data(dataset_path='local_dataset'):
    filepath = dataset_path
    data_df = pd.read_csv(filepath, dtype={'video_id': str})
    vocabulary_list = data_df['gloss'].tolist()
    return data_df, vocabulary_list

def check_gloss_in_vocabulary(gloss, vocabulary_list):
    return gloss in vocabulary_list

def select_video_id_from_gloss(gloss, dataset):
    filtered_data_id_11 = dataset.loc[dataset['signer_id'] == 11]
    if gloss in filtered_data_id_11['gloss'].tolist():
        video_id = filtered_data_id_11.loc[filtered_data_id_11['gloss'] == gloss, 'video_id'].values
    else:
        video_id = dataset.loc[dataset['gloss'] == gloss, 'video_id'].values
    return video_id[0]