diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..1de644888645617c06b87456f29b7d9ea45c4284 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +assets/cam_demo1.gif filter=lfs diff=lfs merge=lfs -text +assets/cam_demo2.gif filter=lfs diff=lfs merge=lfs -text +assets/demo10.gif filter=lfs diff=lfs merge=lfs -text +assets/demo20.gif filter=lfs diff=lfs merge=lfs -text +data/source/demo.mp4 filter=lfs diff=lfs merge=lfs -text diff --git a/HifiFaceAPI_parallel_base.py b/HifiFaceAPI_parallel_base.py new file mode 100644 index 0000000000000000000000000000000000000000..5473554f6d7e92e0ffb09f35570a9842a0c1435a --- /dev/null +++ b/HifiFaceAPI_parallel_base.py @@ -0,0 +1,148 @@ +import os +import time +import numpy as np + +import numexpr as ne +# ne.set_num_threads(10) + +from multiprocessing.dummy import Process, Queue +from face_detect.face_align_68 import face_alignment_landmark +from face_detect.face_detect import FaceDetect +from face_lib.face_swap import HifiFace +from face_restore.gfpgan_onnx_api import GFPGAN +from face_restore.xseg_onnx_api import XSEG + +TRACKING_THRESHOLD = 0.15 + +# def np_norm(x): +# return (x - np.average(x)) / np.std(x) + +def cosine_vectorized_v3(array1, array2): + sumyy = np.einsum('ij,ij->i', array2, array2) + sumxx = np.einsum('ij,ij->i', array1, array1)[:, None] + sumxy = array1.dot(array2.T) + sqrt_sumxx = ne.evaluate('sqrt(sumxx)') + sqrt_sumyy = ne.evaluate('sqrt(sumyy)') + return ne.evaluate('(sumxy/sqrt_sumxx)/sqrt_sumyy') + + +class Consumer0Base(Process): + def __init__(self, opt, frame_queue_in, feature_dst_list=None, queue_list=None, block=True, fps_counter=False): + super().__init__() + self.queue_list = queue_list + self.fps_counter = fps_counter + self.block = block + self.pid = os.getpid() + + self.opt = opt + self.frame_queue_in = frame_queue_in + self.feature_dst_list = feature_dst_list + self.crop_size = self.opt.input_size + self.scrfd_detector = FaceDetect(mode='scrfd_500m', tracking_thres=TRACKING_THRESHOLD) + self.face_alignment = face_alignment_landmark(lm_type=68) + + print('init consumer {}, pid is {}.'.format(self.__class__.__name__, self.pid)) + + +class Consumer1BaseONNX(Process): + def __init__(self, opt, feature_list, queue_list: list, block=True, fps_counter=False,provider='gpu', load_xseg=True, xseg_flag=False): + super().__init__() + self.queue_list = queue_list + self.fps_counter = fps_counter + self.block = block + self.pid = os.getpid() + self.opt = opt + self.feature_list = feature_list + # self.index_list = index_list + # self.apply_gpen = apply_gpen + self.crop_size = self.opt.input_size + self.xseg_flag = xseg_flag + + print("model_name:", self.opt.model_name) + self.hf = HifiFace(model_name='er8_bs1', provider=provider) + if load_xseg: + self.xseg = XSEG(model_type='xseg_0611', provider=provider) + + def switch_xseg(self): + self.xseg_flag = not self.xseg_flag + + def predict(self, src_face_image, dst_face_latent): + mask_out, swap_face_out = self.hf.forward(src_face_image, dst_face_latent) + if self.xseg_flag: + mask_out = self.xseg.forward(swap_face_out)[None,None] + return [mask_out, swap_face_out] + + +class Consumer2Base(Process): + def __init__(self, queue_list: list, frame_queue_out, block=True, fps_counter=False): + super().__init__() + self.queue_list = queue_list + self.fps_counter = fps_counter + self.block = block + self.pid = os.getpid() + self.frame_queue_out = frame_queue_out + + # from face_restore import FaceRestore + # self.fa = FaceRestore(use_gpu=True, mode='gfpgan') # gfpgan gpen dfdnet + + print('init consumer {}, pid is {}.'.format(self.__class__.__name__, self.pid)) + + def run(self): + counter = 0 + start_time = time.time() + + while True: + something_in = self.queue_list[0].get() + + # exit condition + if something_in is None: + print('subprocess {} exit !'.format(self.pid)) + break + + self.forward_func(something_in) + + if self.fps_counter: + counter += 1 + if (time.time() - start_time) > 4: + print("Consumer2 FPS: {}".format(counter / (time.time() - start_time))) + counter = 0 + start_time = time.time() + print('c2 stop') + # cv2.destroyAllWindows() + +class Consumer3Base(Process): + def __init__(self, queue_list, block=True, fps_counter=False, provider='gpu'): + super().__init__() + self.queue_list = queue_list + self.fps_counter = fps_counter + self.block = block + self.pid = os.getpid() + + self.gfp = GFPGAN(model_type='GFPGANv1.4', provider=provider) + + print('init consumer {}, pid is {}.'.format(self.__class__.__name__, self.pid)) + + def run(self): + counter = 0 + start_time = time.time() + + while True: + something_in = self.queue_list[0].get() + + if something_in is None: + print('subprocess {} exit !'.format(self.pid)) + self.queue_list[1].put(None) + break + + self.forward_func(something_in) + + + if self.fps_counter: + counter += 1 + if (time.time() - start_time) > 4: + print("Consumer3 FPS: {}".format(counter / (time.time() - start_time))) + counter = 0 + start_time = time.time() + + print('c3 stop') + diff --git a/HifiFaceAPI_parallel_trt_roi_realtime_api.py b/HifiFaceAPI_parallel_trt_roi_realtime_api.py new file mode 100644 index 0000000000000000000000000000000000000000..29f9273d6aad7bd1b5f414602e33ab87b1f684f3 --- /dev/null +++ b/HifiFaceAPI_parallel_trt_roi_realtime_api.py @@ -0,0 +1,189 @@ +import os +import cv2 +import time +import numpy as np +import numexpr as ne +from multiprocessing.dummy import Process, Queue +from options.hifi_test_options import HifiTestOptions +from HifiFaceAPI_parallel_base import Consumer0Base, Consumer2Base, Consumer1BaseONNX + + +def np_norm(x): + return (x - np.average(x)) / np.std(x) + + +def reverse2wholeimage_hifi_trt_roi(swaped_img, mat_rev, img_mask, frame, roi_img, roi_box): + target_image = cv2.warpAffine(swaped_img, mat_rev, roi_img.shape[:2][::-1], borderMode=cv2.BORDER_REPLICATE)[ + ..., + ::-1] + + local_dict = { + 'img_mask': img_mask, + 'target_image': target_image, + 'roi_img': roi_img, + } + img = ne.evaluate('img_mask * (target_image * 255)+(1 - img_mask) * roi_img', local_dict=local_dict, + global_dict=None) + img = img.astype(np.uint8) + frame[roi_box[1]:roi_box[3], roi_box[0]:roi_box[2]] = img + return frame + + +def get_max_face(np_rois): + roi_areas = [] + for index in range(np_rois.shape[0]): + roi_areas.append((np_rois[index, 2] - np_rois[index, 0]) * (np_rois[index, 3] - np_rois[index, 1])) + return np.argmax(np.array(roi_areas)) + + +class Consumer0(Consumer0Base): + def __init__(self, opt, frame_queue_in, queue_list: list, block=True, fps_counter=False): + super().__init__(opt, frame_queue_in, None, queue_list, block, fps_counter) + + def run(self): + counter = 0 + start_time = time.time() + kpss_old = None + rois_old = faces_old = Ms_old = masks_old = None + + while True: + frame = self.frame_queue_in.get() + if frame is None: + break + try: + _, bboxes, kpss = self.scrfd_detector.get_bboxes(frame, max_num=0) + rois, faces, Ms, masks = self.face_alignment.forward( + frame, bboxes, kpss, limit=5, min_face_size=30, + crop_size=(self.crop_size, self.crop_size), apply_roi=True + ) + + except (TypeError, IndexError, ValueError) as e: + self.queue_list[0].put([None, frame]) + continue + + if len(faces)==0: + self.queue_list[0].put([None, frame]) + continue + elif len(faces)==1: + face = np.array(faces[0]) + mat = Ms[0] + roi_box = rois[0] + else: + max_index = get_max_face(np.array(rois)) + face = np.array(faces[max_index]) + mat = Ms[max_index] + roi_box = rois[max_index] + roi_img = frame[roi_box[1]:roi_box[3], roi_box[0]:roi_box[2]] + + # "The default normalization to the range of -1 to 1, where the model input is in RGB format + face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB) + + self.queue_list[0].put([face, mat, [], frame, roi_img, roi_box]) + + if self.fps_counter: + counter += 1 + if (time.time() - start_time) > 10: + print("Consumer0 FPS: {}".format(counter / (time.time() - start_time))) + counter = 0 + start_time = time.time() + self.queue_list[0].put(None) + print('co stop') + + +class Consumer1(Consumer1BaseONNX): + def __init__(self, opt, feature_list, queue_list: list, block=True, fps_counter=False): + super().__init__(opt, feature_list, queue_list, block, fps_counter) + + def run(self): + counter = 0 + start_time = time.time() + + while True: + something_in = self.queue_list[0].get() + if something_in is None: + break + elif len(something_in) == 2: + self.queue_list[1].put([None, something_in[1]]) + continue + + + if len(self.feature_list) > 1: + self.feature_list.pop(0) + + image_latent = self.feature_list[0][0] + + mask_out, swap_face_out = self.predict(something_in[0], image_latent[0].reshape(1, -1)) + + mask = cv2.warpAffine(mask_out[0][0].astype(np.float32), something_in[1], + something_in[4].shape[:2][::-1]) + mask[mask > 0.2] = 1 + mask = mask[:, :, np.newaxis].astype(np.uint8) + swap_face = swap_face_out[0].transpose((1, 2, 0)).astype(np.float32) + + self.queue_list[1].put( + [swap_face, something_in[1], mask, something_in[3], something_in[4], something_in[5]]) + + if self.fps_counter: + counter += 1 + if (time.time() - start_time) > 10: + print("Consumer1 FPS: {}".format(counter / (time.time() - start_time))) + counter = 0 + start_time = time.time() + self.queue_list[1].put(None) + print('c1 stop') + + +class Consumer2(Consumer2Base): + def __init__(self, queue_list: list, frame_queue_out, block=True, fps_counter=False): + super().__init__(queue_list, frame_queue_out, block, fps_counter) + self.face_detect_flag = True + + def forward_func(self, something_in): + + # do your work here. + if len(something_in) == 2: + self.face_detect_flag = False + frame = something_in[1] + frame_out = frame.astype(np.uint8) + else: + self.face_detect_flag = True + # swap_face = something_in[0] + swap_face = ((something_in[0] + 1) / 2) + frame_out = reverse2wholeimage_hifi_trt_roi( + swap_face, something_in[1], something_in[2], + something_in[3], something_in[4], something_in[5] + ) + self.frame_queue_out.put([frame_out, self.face_detect_flag]) + # cv2.imshow('output', frame_out) + # cv2.waitKey(1) + + +class HifiFaceRealTime: + + def __init__(self, feature_dict_list_, frame_queue_in, frame_queue_out, gpu=True, model_name=''): + self.opt = HifiTestOptions().parse() + if model_name != '': + self.opt.model_name = model_name + self.opt.input_size = 256 + self.feature_dict_list = feature_dict_list_ + self.frame_queue_in = frame_queue_in + self.frame_queue_out = frame_queue_out + + self.gpu = gpu + + def forward(self): + self.q0 = Queue(2) + self.q1 = Queue(2) + + self.c0 = Consumer0(self.opt, self.frame_queue_in, [self.q0], fps_counter=False) + self.c1 = Consumer1(self.opt, self.feature_dict_list, [self.q0, self.q1], fps_counter=False) + self.c2 = Consumer2([self.q1], self.frame_queue_out, fps_counter=False) + + self.c0.start() + self.c1.start() + self.c2.start() + + self.c0.join() + self.c1.join() + self.c2.join() + return diff --git a/HifiFaceAPI_parallel_trt_roi_realtime_sr_api.py b/HifiFaceAPI_parallel_trt_roi_realtime_sr_api.py new file mode 100644 index 0000000000000000000000000000000000000000..86ae805f8bbe8534cdfb6dfef19a94dd3c355b17 --- /dev/null +++ b/HifiFaceAPI_parallel_trt_roi_realtime_sr_api.py @@ -0,0 +1,234 @@ +import os +import cv2 +import time +import numpy as np +import numexpr as ne +from multiprocessing.dummy import Process, Queue +from options.hifi_test_options import HifiTestOptions +from HifiFaceAPI_parallel_base import Consumer0Base, Consumer2Base, Consumer3Base,Consumer1BaseONNX +from color_transfer import color_transfer + + +def np_norm(x): + return (x - np.average(x)) / np.std(x) + + +def reverse2wholeimage_hifi_trt_roi(swaped_img, mat_rev, img_mask, frame, roi_img, roi_box): + target_image = cv2.warpAffine(swaped_img, mat_rev, roi_img.shape[:2][::-1], borderMode=cv2.BORDER_REPLICATE)[ + ..., + ::-1] + local_dict = { + 'img_mask': img_mask, + 'target_image': target_image, + 'roi_img': roi_img, + } + img = ne.evaluate('img_mask * (target_image * 255)+(1 - img_mask) * roi_img', local_dict=local_dict, + global_dict=None) + img = img.astype(np.uint8) + frame[roi_box[1]:roi_box[3], roi_box[0]:roi_box[2]] = img + return frame + + +def get_max_face(np_rois): + roi_areas = [] + for index in range(np_rois.shape[0]): + roi_areas.append((np_rois[index, 2] - np_rois[index, 0]) * (np_rois[index, 3] - np_rois[index, 1])) + return np.argmax(np.array(roi_areas)) + +class Consumer0(Consumer0Base): + def __init__(self, opt, frame_queue_in, queue_list: list, block=True, fps_counter=False, align_method='68'): + super().__init__(opt, frame_queue_in, None, queue_list, block, fps_counter) + self.align_method = align_method + + def run(self): + counter = 0 + start_time = time.time() + kpss_old = None + rois_old = faces_old = Ms_old = masks_old = None + + while True: + frame = self.frame_queue_in.get() + if frame is None: + break + try: + _, bboxes, kpss = self.scrfd_detector.get_bboxes(frame, max_num=0) + if self.align_method == '5class': + rois, faces, Ms, masks = self.mtcnn_detector.align_multi_for_scrfd( + frame, bboxes, kpss, limit=1, min_face_size=30, + crop_size=(self.crop_size, self.crop_size), apply_roi=True, detector=None + ) + else: + rois, faces, Ms, masks = self.face_alignment.forward( + frame, bboxes, kpss, limit=5, min_face_size=30, + crop_size=(self.crop_size, self.crop_size), apply_roi=True + ) + + except (TypeError, IndexError, ValueError) as e: + self.queue_list[0].put([None, frame]) + continue + + if len(faces)==0: + self.queue_list[0].put([None, frame]) + continue + elif len(faces)==1: + face = np.array(faces[0]) + mat = Ms[0] + roi_box = rois[0] + else: + max_index = get_max_face(np.array(rois)) + face = np.array(faces[max_index]) + mat = Ms[max_index] + roi_box = rois[max_index] + roi_img = frame[roi_box[1]:roi_box[3], roi_box[0]:roi_box[2]] + + #The default normalization to the range of -1 to 1, where the model input is in RGB format + face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB) + + self.queue_list[0].put([face, mat, [], frame, roi_img, roi_box]) + + if self.fps_counter: + counter += 1 + if (time.time() - start_time) > 10: + print("Consumer0 FPS: {}".format(counter / (time.time() - start_time))) + counter = 0 + start_time = time.time() + self.queue_list[0].put(None) + print('co stop') + + +class Consumer1(Consumer1BaseONNX): + def __init__(self, opt, feature_list, queue_list: list, block=True, fps_counter=False): + super().__init__(opt, feature_list, queue_list, block, fps_counter) + + def run(self): + counter = 0 + start_time = time.time() + + while True: + something_in = self.queue_list[0].get() + if something_in is None: + break + elif len(something_in) == 2: + self.queue_list[1].put([None, something_in[1]]) + continue + + if len(self.feature_list) > 1: + self.feature_list.pop(0) + + image_latent = self.feature_list[0][0] + + mask_out, swap_face_out = self.predict(something_in[0], image_latent[0].reshape(1, -1)) + + mask = cv2.warpAffine(mask_out[0][0].astype(np.float32), something_in[1], + something_in[4].shape[:2][::-1]) + mask[mask > 0.2] = 1 + mask = mask[:, :, np.newaxis].astype(np.uint8) + swap_face = swap_face_out[0].transpose((1, 2, 0)).astype(np.float32) + + self.queue_list[1].put( + [swap_face, something_in[1], mask, something_in[3], something_in[4], something_in[5], something_in[0]]) + + if self.fps_counter: + counter += 1 + if (time.time() - start_time) > 10: + print("Consumer1 FPS: {}".format(counter / (time.time() - start_time))) + counter = 0 + start_time = time.time() + self.queue_list[1].put(None) + print('c1 stop') + + +class Consumer2(Consumer2Base): + def __init__(self, queue_list: list, frame_queue_out, block=True, fps_counter=False): + super().__init__(queue_list, frame_queue_out, block, fps_counter) + + def forward_func(self, something_in): + if len(something_in) == 2: + frame = something_in[1] + frame_out = frame.astype(np.uint8) + else: + swap_face = ((something_in[0] + 1) / 2) + frame_out = reverse2wholeimage_hifi_trt_roi( + swap_face, something_in[1], something_in[2], + something_in[3], something_in[4], something_in[5] + ) + self.frame_queue_out.put(frame_out) + # cv2.imshow('output', frame_out) + # cv2.waitKey(1) + +class Consumer3(Consumer3Base): + def __init__(self, queue_list, block=True, fps_counter=False, use_gfpgan=True, sr_weight=1.0, + use_color_trans=False, color_trans_mode=''): + super().__init__(queue_list, block, fps_counter) + self.use_gfpgan = use_gfpgan + self.sr_weight = sr_weight + self.use_color_trans = use_color_trans + self.color_trans_mode = color_trans_mode + + def forward_func(self, something_in): + if len(something_in) == 2: + self.queue_list[1].put([None, something_in[1]]) + else: + swap_face = something_in[0] + target_face = (something_in[6] / 255).astype(np.float32) + if self.use_gfpgan: + sr_face = self.gfp.forward(swap_face) + if self.sr_weight != 1.0: + sr_face = cv2.addWeighted(sr_face, alpha=self.sr_weight, src2=swap_face, beta=1.0 - self.sr_weight, gamma=0, dtype=cv2.CV_32F) + if self.use_color_trans: + transed_face = color_transfer(self.color_trans_mode, (sr_face + 1) / 2, target_face) + result_face = (transed_face * 2) - 1 + else: + result_face = sr_face + else: + if self.use_color_trans: + transed_face = color_transfer(self.color_trans_mode, (swap_face + 1) / 2, target_face) + result_face = (transed_face * 2) - 1 + else: + result_face = swap_face + self.queue_list[1].put([result_face, something_in[1], something_in[2], something_in[3], + something_in[4], something_in[5]]) + + +class HifiFaceRealTime: + + def __init__(self, feature_dict_list_, frame_queue_in, frame_queue_out, gpu=True, model_name='er8_bs1', align_method='68', + use_gfpgan=True, sr_weight=1.0, use_color_trans=False, color_trans_mode='rct'): + self.opt = HifiTestOptions().parse() + if model_name != '': + self.opt.model_name = model_name + self.opt.input_size = 256 + self.feature_dict_list = feature_dict_list_ + self.frame_queue_in = frame_queue_in + self.frame_queue_out = frame_queue_out + + self.gpu = gpu + self.align_method = align_method + self.use_gfpgan = use_gfpgan + self.sr_weight = sr_weight + self.use_color_trans = use_color_trans + self.color_trans_mode = color_trans_mode + + + def forward(self): + self.q0 = Queue(2) + self.q1 = Queue(2) + self.q2 = Queue(2) + + self.c0 = Consumer0(self.opt, self.frame_queue_in, [self.q0], fps_counter=False, align_method=self.align_method) + self.c1 = Consumer1(self.opt, self.feature_dict_list, [self.q0, self.q1], fps_counter=False) + self.c3 = Consumer3([self.q1, self.q2], fps_counter=False, + use_gfpgan=self.use_gfpgan, sr_weight=self.sr_weight, + use_color_trans=self.use_color_trans, color_trans_mode=self.color_trans_mode) + self.c2 = Consumer2([self.q2], self.frame_queue_out, fps_counter=False) + + self.c0.start() + self.c1.start() + self.c3.start() + self.c2.start() + + self.c0.join() + self.c1.join() + self.c3.join() + self.c2.join() + return diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..6160f5b6c383c29f58015455d28af27b1313ba52 --- /dev/null +++ b/LICENSE @@ -0,0 +1,32 @@ +Silicon Intelligence COMMUNITY LICENSE AGREEMENT + +“Agreement” means the terms and conditions for use, reproduction, distribution and modification of this product forth herein. + +“Documentation” means the specifications, manuals and documentation by Silicon Intelligence. + +“Licensee” or “you” means you, or your employer or any other person or entity (if you are entering into this Agreement on such person or entity’s behalf), of the age required under applicable laws, rules or regulations to provide legal consent and that has legal authority to bind your employer or such other person or entity if you are entering in this Agreement on their behalf. + +“Silicon Intelligence Materials” means, collectively, Silicon Intelligence’s proprietary code and Documentation (and any portion thereof) made available under this Agreement. + +By clicking “I Accept” below or by using or distributing any portion or element of the Silicon Intelligence Materials, you agree to be bound by this Agreement. + +1. License Rights and Redistribution. + + a. Grant of Rights. You are granted a non-exclusive, worldwide, non-transferable and royalty-free limited license under ’s intellectual property or other rights owned by Silicon Intelligence embodied in the SILICON INTELLIGENCE Materials to use, reproduce, distribute, copy, create derivative works of, and make modifications to the Silicon Intelligence Materials. + b. Redistribution and Use. + i. If you distribute or make available the Silicon Intelligence Materials (or any derivative works thereof), or a product or service that uses any of them, you shall (A) provide a copy of this Agreement with any such Silicon Intelligence Materials; and (B) prominently display “Built with Silicon Intelligence” on a related website, user interface, blogpost, about page, or product documentation. If you use the Silicon Intelligence Materials to create, train, fine tune, or otherwise improve an AI model, which is distributed or made available, you shall also include “Silicon Intelligence” at the beginning of any such AI model name. + ii. If you receive Silicon Intelligence Materials, or any derivative works thereof, from a Licensee as part of an integrated end user product, then Section 2 of this Agreement will not apply to you. + iii. You must retain in all copies of the Silicon Intelligence Materials that you distribute the following attribution notice within a “Notice” text file distributed as a part of such copies: “Silicon Intelligence is licensed under the Silicon Intelligence Community License, Copyright © Silicon Intelligence Platforms, Inc. All Rights Reserved.” + iv. Your use of the Silicon Intelligence Materials must comply with applicable laws and regulations (including trade compliance laws and regulations) . + +2. Additional Commercial Terms. If, on the Silicon Intelligence duix.ai version release date, the monthly active users of the products or services made available by or for Licensee, or Licensee’s affiliates, is greater than 1 thousand monthly active users in the preceding calendar month, or your product based Silicon Intelligence material your active users greater 1 thousand, you must request a license from Silicon Intelligence, which Silicon Intelligence may grant to you in its sole discretion, and you are not authorized to exercise any of the rights under this Agreement unless or until Silicon Intelligence otherwise expressly grants you such rights. + +3. Disclaimer of Warranty. UNLESS REQUIRED BY APPLICABLE LAW, THE SILICON INTELLIGENCE MATERIALS AND ANY OUTPUT AND RESULTS THEREFROM ARE PROVIDED ON AN “AS IS” BASIS, WITHOUT WARRANTIES OF ANY KIND, AND SILICON INTELLIGENCE DISCLAIMS ALL WARRANTIES OF ANY KIND, BOTH EXPRESS AND IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. YOU ARE SOLELY RESPONSIBLE FOR DETERMINING THE APPROPRIATENESS OF USING OR REDISTRIBUTING THE SILICON INTELLIGENCE MATERIALS AND ASSUME ANY RISKS ASSOCIATED WITH YOUR USE OF THE SILICON INTELLIGENCE MATERIALS AND ANY OUTPUT AND RESULTS. + +4. Limitation of Liability. IN NO EVENT WILL SILICON INTELLIGENCE OR ITS AFFILIATES BE LIABLE UNDER ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, TORT, NEGLIGENCE, PRODUCTS LIABILITY, OR OTHERWISE, ARISING OUT OF THIS AGREEMENT, FOR ANY LOST PROFITS OR ANY INDIRECT, SPECIAL, CONSEQUENTIAL, INCIDENTAL, EXEMPLARY OR PUNITIVE DAMAGES, EVEN IF SILICON INTELLIGENCE OR ITS AFFILIATES HAVE BEEN ADVISED OF THE POSSIBILITY OF ANY OF THE FOREGOING. + +5. Intellectual Property. + a. No trademark licenses are granted under this Agreement, and in connection with the Silicon Intelligence Materials, neither Silicon Intelligence nor Licensee may use any name or mark owned by or associated with the other or any of its affiliates, except as required for reasonable and customary use in describing and redistributing the Silicon Intelligence Materials or as set forth in this Section 5(a). Silicon Intelligence hereby grants you a license to use “Silicon Intelligence” solely as required to comply with the last sentence of Section 1.b.i. You will comply with Silicon Intelligence’s brand guidelines . All goodwill arising out of your use of the Mark will inure to the benefit of Silicon Intelligence. + b. If you institute litigation or other proceedings against Silicon Intelligenceor any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Silicon Intelligence Materials or outputs or results, or any portion of any of the foregoing, constitutes infringement of intellectual property or other rights owned or licensable by you, then any licenses granted to you under this Agreement shall terminate as of the date such litigation or claim is filed or instituted. You will indemnify and hold harmless Silicon Intelligence from and against any claim by any third party arising out of or related to your use or distribution of the Silicon Intelligence Materials. + +6. Term and Termination. The term of this Agreement will commence upon your acceptance of this Agreement or access to the Silicon Intelligence Materials and will continue in full force and effect until terminated in accordance with the terms and conditions herein. Silicon Intelligence may terminate this Agreement if you are in breach of any term or condition of this Agreement. Upon termination of this Agreement, you shall delete and cease use of the Silicon Intelligence Materials. Sections 3, 4 shall survive the termination of this Agreement. diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..4bf77556802d76441ede81d71e2f00743525b22a --- /dev/null +++ b/app.py @@ -0,0 +1,175 @@ +import gradio as gr +import cv2 +import os +import numpy as np +import numexpr as ne +from concurrent.futures import ThreadPoolExecutor + +from face_feature.hifi_image_api import HifiImage +from HifiFaceAPI_parallel_trt_roi_realtime_sr_api import HifiFaceRealTime +from face_lib.face_swap import HifiFace +from face_restore.gfpgan_onnx_api import GFPGAN +from face_restore.xseg_onnx_api import XSEG +from face_detect.face_align_68 import face_alignment_landmark +from face_detect.face_detect import FaceDetect +from options.hifi_test_options import HifiTestOptions +from color_transfer import color_transfer + +opt = HifiTestOptions().parse() +processor = None + +def initialize_processor(): + global processor + if processor is None: + processor = FaceSwapProcessor(crop_size=opt.input_size) + +class FaceSwapProcessor: + def __init__(self, crop_size=256): + self.hi = HifiImage(crop_size=crop_size) + self.xseg = XSEG(model_type='xseg_0611', provider='gpu') + self.hf = HifiFace(model_name='er8_bs1', provider='gpu') + self.scrfd_detector = FaceDetect(mode='scrfd_500m', tracking_thres=0.15) + self.face_alignment = face_alignment_landmark(lm_type=68) + self.gfp = GFPGAN(model_type='GFPGANv1.4', provider='gpu') + self.crop_size = crop_size + + def reverse2wholeimage_hifi_trt_roi(self, swaped_img, mat_rev, img_mask, frame, roi_img, roi_box): + target_image = cv2.warpAffine(swaped_img, mat_rev, roi_img.shape[:2][::-1], borderMode=cv2.BORDER_REPLICATE)[ + ..., + ::-1] + local_dict = { + 'img_mask': img_mask, + 'target_image': target_image, + 'roi_img': roi_img, + } + img = ne.evaluate('img_mask * (target_image * 255)+(1 - img_mask) * roi_img', local_dict=local_dict, + global_dict=None) + img = img.astype(np.uint8) + frame[roi_box[1]:roi_box[3], roi_box[0]:roi_box[2]] = img + return frame + + def process_frame(self, frame, image_latent, use_gfpgan, sr_weight, use_color_trans, color_trans_mode): + _, bboxes, kpss = self.scrfd_detector.get_bboxes(frame, max_num=0) + rois, faces, Ms, masks = self.face_alignment.forward( + frame, bboxes, kpss, limit=5, min_face_size=30, + crop_size=(self.crop_size, self.crop_size), apply_roi=True + ) + + if len(faces) == 0: + return frame + elif len(faces) == 1: + face = np.array(faces[0]) + mat = Ms[0] + roi_box = rois[0] + else: + max_index = np.argmax([roi[2] * roi[3] for roi in rois]) # Get the largest face + face = np.array(faces[max_index]) + mat = Ms[max_index] + roi_box = rois[max_index] + + roi_img = frame[roi_box[1]:roi_box[3], roi_box[0]:roi_box[2]] + face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB) + + mask_out, swap_face_out = self.hf.forward(face, image_latent[0].reshape(1, -1)) + mask_out = self.xseg.forward(swap_face_out)[None, None] + + mask = cv2.warpAffine(mask_out[0][0].astype(np.float32), mat, roi_img.shape[:2][::-1]) + mask[mask > 0.2] = 1 + mask = mask[:, :, np.newaxis].astype(np.uint8) + swap_face = swap_face_out[0].transpose((1, 2, 0)).astype(np.float32) + target_face = (face.copy() / 255).astype(np.float32) + + if use_gfpgan: + sr_face = self.gfp.forward(swap_face) + if sr_weight != 1.0: + sr_face = cv2.addWeighted(sr_face, sr_weight, swap_face, 1.0 - sr_weight, 0) + if use_color_trans: + transed_face = color_transfer(color_trans_mode, (sr_face + 1) / 2, target_face) + swap_face = (transed_face * 2) - 1 + else: + swap_face = sr_face + elif use_color_trans: + transed_face = color_transfer(color_trans_mode, (swap_face + 1) / 2, target_face) + swap_face = (transed_face * 2) - 1 + + swap_face = ((swap_face + 1) / 2) + + frame_out = self.reverse2wholeimage_hifi_trt_roi( + swap_face, mat, mask, + frame, roi_img, roi_box + ) + + return frame_out + +def process_image_video(image, video_path, use_gfpgan, sr_weight, use_color_trans, color_trans_mode): + global processor + initialize_processor() + + src_latent, _ = processor.hi.get_face_feature(image) + image_latent = [src_latent] + + video = cv2.VideoCapture(video_path) + video_fps = video.get(cv2.CAP_PROP_FPS) + video_size = (int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), + int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))) + output_dir = 'data/output/' + if not os.path.exists(output_dir): + os.mkdir(output_dir) + swap_video_path = output_dir + 'temp.mp4' + videoWriter = cv2.VideoWriter(swap_video_path, cv2.VideoWriter_fourcc(*'mp4v'), video_fps, video_size) + + with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor: + futures = [] + while True: + ret, frame = video.read() + if not ret: + break + future = executor.submit(processor.process_frame, frame, image_latent, use_gfpgan, sr_weight, + use_color_trans, color_trans_mode) + futures.append(future) + + for future in futures: + processed_frame = future.result() + if processed_frame is not None: + videoWriter.write(processed_frame) + + video.release() + videoWriter.release() + + add_audio_to_video(video_path, swap_video_path) + + return swap_video_path + + +def add_audio_to_video(original_video_path, swapped_video_path): + audio_file_path = original_video_path.split('.')[0] + '.wav' + if not os.path.exists(audio_file_path): + os.system(f'ffmpeg -y -hide_banner -loglevel error -i "{original_video_path}" -f wav -vn "{audio_file_path}"') + + temp_output_path = swapped_video_path.replace('.mp4', '_with_audio.mp4') + os.system( + f'ffmpeg -y -hide_banner -loglevel error -i "{swapped_video_path}" -i "{audio_file_path}" -c:v copy -c:a aac "{temp_output_path}"') + + os.remove(swapped_video_path) + os.rename(temp_output_path, swapped_video_path) + + +# Gradio interface setup +iface = gr.Interface( + fn=process_image_video, + inputs=[ + gr.Image(type="pil", label="Source Image"), + gr.Video(label="Input Video"), + gr.Checkbox(label="Use GFPGAN [Super-Resolution]"), + gr.Slider(minimum=0.1, maximum=1.0, step=0.1, label="SR Weight [only support GFPGAN enabled]", value=1.0), + gr.Checkbox(label="Use Color Transfer"), + gr.Dropdown(choices=["rct", "lct", "mkl", "idt", "sot"], + label="Color Transfer Mode [only support Color-Transfer enabled]", value="rct") + ], + outputs=gr.Video(label="Output Video"), + title="Video Generation", + description="Upload an image and a video, and the system will generate a new video based on the input." +) + +if __name__ == "__main__": + iface.launch() \ No newline at end of file diff --git a/assets/cam_demo1.gif b/assets/cam_demo1.gif new file mode 100644 index 0000000000000000000000000000000000000000..0e98b5904bb16b77aa2205e8f2181e064468910f --- /dev/null +++ b/assets/cam_demo1.gif @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:239cad1722caf9fc14c89627d80e38e549bc940046615c7ba6f58da78ac11f5d +size 1341237 diff --git a/assets/cam_demo2.gif b/assets/cam_demo2.gif new file mode 100644 index 0000000000000000000000000000000000000000..3986cdb3c28b52566f1942a6414f31ef78212fea --- /dev/null +++ b/assets/cam_demo2.gif @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc03b463e4693951750560db273e5565edbf60b4008b6a80e30daac0e2b65aac +size 1311740 diff --git a/assets/demo10.gif b/assets/demo10.gif new file mode 100644 index 0000000000000000000000000000000000000000..93ff3cea2490c6a2989439ce13374a55e292712c --- /dev/null +++ b/assets/demo10.gif @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cf6141d4199093824e9786e4c29544f49df24b79cf63dd7535fe40662c376a7 +size 5662354 diff --git a/assets/demo20.gif b/assets/demo20.gif new file mode 100644 index 0000000000000000000000000000000000000000..603906662d62e4444af654dd35e846e33eeda65f --- /dev/null +++ b/assets/demo20.gif @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6df9f0d67063073f9874c68a6462f5c060ef91b0822a86660bd32120e63004ff +size 2702437 diff --git a/color_transfer.py b/color_transfer.py new file mode 100644 index 0000000000000000000000000000000000000000..fe354e7e28c0fb20784e8af4ff2eda9b01dfea5d --- /dev/null +++ b/color_transfer.py @@ -0,0 +1,337 @@ +import cv2 +import numexpr as ne +import numpy as np +import scipy as sp +from numpy import linalg as npla + + +def color_transfer_sot(src,trg, steps=10, batch_size=5, reg_sigmaXY=16.0, reg_sigmaV=5.0): + """ + Color Transform via Sliced Optimal Transfer + ported by @iperov from https://github.com/dcoeurjo/OTColorTransfer + + src - any float range any channel image + dst - any float range any channel image, same shape as src + steps - number of solver steps + batch_size - solver batch size + reg_sigmaXY - apply regularization and sigmaXY of filter, otherwise set to 0.0 + reg_sigmaV - sigmaV of filter + + return value - clip it manually + """ + if not np.issubdtype(src.dtype, np.floating): + raise ValueError("src value must be float") + if not np.issubdtype(trg.dtype, np.floating): + raise ValueError("trg value must be float") + + if len(src.shape) != 3: + raise ValueError("src shape must have rank 3 (h,w,c)") + + if src.shape != trg.shape: + raise ValueError("src and trg shapes must be equal") + + src_dtype = src.dtype + h,w,c = src.shape + new_src = src.copy() + + advect = np.empty ( (h*w,c), dtype=src_dtype ) + for step in range (steps): + advect.fill(0) + for batch in range (batch_size): + dir = np.random.normal(size=c).astype(src_dtype) + dir /= npla.norm(dir) + + projsource = np.sum( new_src*dir, axis=-1).reshape ((h*w)) + projtarget = np.sum( trg*dir, axis=-1).reshape ((h*w)) + + idSource = np.argsort (projsource) + idTarget = np.argsort (projtarget) + + a = projtarget[idTarget]-projsource[idSource] + for i_c in range(c): + advect[idSource,i_c] += a * dir[i_c] + new_src += advect.reshape( (h,w,c) ) / batch_size + + if reg_sigmaXY != 0.0: + src_diff = new_src-src + src_diff_filt = cv2.bilateralFilter (src_diff, 0, reg_sigmaV, reg_sigmaXY ) + if len(src_diff_filt.shape) == 2: + src_diff_filt = src_diff_filt[...,None] + new_src = src + src_diff_filt + return new_src + +def color_transfer_mkl(x0, x1): + eps = np.finfo(float).eps + + h,w,c = x0.shape + h1,w1,c1 = x1.shape + + x0 = x0.reshape ( (h*w,c) ) + x1 = x1.reshape ( (h1*w1,c1) ) + + a = np.cov(x0.T) + b = np.cov(x1.T) + + Da2, Ua = np.linalg.eig(a) + Da = np.diag(np.sqrt(Da2.clip(eps, None))) + + C = np.dot(np.dot(np.dot(np.dot(Da, Ua.T), b), Ua), Da) + + Dc2, Uc = np.linalg.eig(C) + Dc = np.diag(np.sqrt(Dc2.clip(eps, None))) + + Da_inv = np.diag(1./(np.diag(Da))) + + t = np.dot(np.dot(np.dot(np.dot(np.dot(np.dot(Ua, Da_inv), Uc), Dc), Uc.T), Da_inv), Ua.T) + + mx0 = np.mean(x0, axis=0) + mx1 = np.mean(x1, axis=0) + + result = np.dot(x0-mx0, t) + mx1 + return np.clip ( result.reshape ( (h,w,c) ).astype(x0.dtype), 0, 1) + +def color_transfer_idt(i0, i1, bins=256, n_rot=20): + import scipy.stats + + relaxation = 1 / n_rot + h,w,c = i0.shape + h1,w1,c1 = i1.shape + + i0 = i0.reshape ( (h*w,c) ) + i1 = i1.reshape ( (h1*w1,c1) ) + + n_dims = c + + d0 = i0.T + d1 = i1.T + + for i in range(n_rot): + + r = sp.stats.special_ortho_group.rvs(n_dims).astype(np.float32) + + d0r = np.dot(r, d0) + d1r = np.dot(r, d1) + d_r = np.empty_like(d0) + + for j in range(n_dims): + + lo = min(d0r[j].min(), d1r[j].min()) + hi = max(d0r[j].max(), d1r[j].max()) + + p0r, edges = np.histogram(d0r[j], bins=bins, range=[lo, hi]) + p1r, _ = np.histogram(d1r[j], bins=bins, range=[lo, hi]) + + cp0r = p0r.cumsum().astype(np.float32) + cp0r /= cp0r[-1] + + cp1r = p1r.cumsum().astype(np.float32) + cp1r /= cp1r[-1] + + f = np.interp(cp0r, cp1r, edges[1:]) + + d_r[j] = np.interp(d0r[j], edges[1:], f, left=0, right=bins) + + d0 = relaxation * np.linalg.solve(r, (d_r - d0r)) + d0 + + return np.clip ( d0.T.reshape ( (h,w,c) ).astype(i0.dtype) , 0, 1) + +def reinhard_color_transfer(target : np.ndarray, source : np.ndarray, target_mask : np.ndarray = None, source_mask : np.ndarray = None, mask_cutoff=0.5) -> np.ndarray: + """ + Transfer color using rct method. + + target np.ndarray H W 3C (BGR) np.float32 + source np.ndarray H W 3C (BGR) np.float32 + + target_mask(None) np.ndarray H W 1C np.float32 + source_mask(None) np.ndarray H W 1C np.float32 + + mask_cutoff(0.5) float + + masks are used to limit the space where color statistics will be computed to adjust the target + + reference: Color Transfer between Images https://www.cs.tau.ac.il/~turkel/imagepapers/ColorTransfer.pdf + """ + source = cv2.cvtColor(source, cv2.COLOR_BGR2LAB) + target = cv2.cvtColor(target, cv2.COLOR_BGR2LAB) + + source_input = source + if source_mask is not None: + source_input = source_input.copy() + source_input[source_mask[...,0] < mask_cutoff] = [0,0,0] + + target_input = target + if target_mask is not None: + target_input = target_input.copy() + target_input[target_mask[...,0] < mask_cutoff] = [0,0,0] + + target_l_mean, target_l_std, target_a_mean, target_a_std, target_b_mean, target_b_std, \ + = target_input[...,0].mean(), target_input[...,0].std(), target_input[...,1].mean(), target_input[...,1].std(), target_input[...,2].mean(), target_input[...,2].std() + + source_l_mean, source_l_std, source_a_mean, source_a_std, source_b_mean, source_b_std, \ + = source_input[...,0].mean(), source_input[...,0].std(), source_input[...,1].mean(), source_input[...,1].std(), source_input[...,2].mean(), source_input[...,2].std() + + # not as in the paper: scale by the standard deviations using reciprocal of paper proposed factor + target_l = target[...,0] + target_l = ne.evaluate('(target_l - target_l_mean) * source_l_std / target_l_std + source_l_mean') + + target_a = target[...,1] + target_a = ne.evaluate('(target_a - target_a_mean) * source_a_std / target_a_std + source_a_mean') + + target_b = target[...,2] + target_b = ne.evaluate('(target_b - target_b_mean) * source_b_std / target_b_std + source_b_mean') + + np.clip(target_l, 0, 100, out=target_l) + np.clip(target_a, -127, 127, out=target_a) + np.clip(target_b, -127, 127, out=target_b) + + return cv2.cvtColor(np.stack([target_l,target_a,target_b], -1), cv2.COLOR_LAB2BGR) + + +def linear_color_transfer(target_img, source_img, mode='pca', eps=1e-5): + ''' + Matches the colour distribution of the target image to that of the source image + using a linear transform. + Images are expected to be of form (w,h,c) and float in [0,1]. + Modes are chol, pca or sym for different choices of basis. + ''' + mu_t = target_img.mean(0).mean(0) + t = target_img - mu_t + t = t.transpose(2,0,1).reshape( t.shape[-1],-1) + t = t.reshape( t.shape[-1],-1) + Ct = t.dot(t.T) / t.shape[1] + eps * np.eye(t.shape[0]) + mu_s = source_img.mean(0).mean(0) + s = source_img - mu_s + s = s.transpose(2,0,1).reshape( s.shape[-1],-1) + Cs = s.dot(s.T) / s.shape[1] + eps * np.eye(s.shape[0]) + if mode == 'chol': + chol_t = np.linalg.cholesky(Ct) + chol_s = np.linalg.cholesky(Cs) + ts = chol_s.dot(np.linalg.inv(chol_t)).dot(t) + if mode == 'pca': + eva_t, eve_t = np.linalg.eigh(Ct) + Qt = eve_t.dot(np.sqrt(np.diag(eva_t))).dot(eve_t.T) + eva_s, eve_s = np.linalg.eigh(Cs) + Qs = eve_s.dot(np.sqrt(np.diag(eva_s))).dot(eve_s.T) + ts = Qs.dot(np.linalg.inv(Qt)).dot(t) + if mode == 'sym': + eva_t, eve_t = np.linalg.eigh(Ct) + Qt = eve_t.dot(np.sqrt(np.diag(eva_t))).dot(eve_t.T) + Qt_Cs_Qt = Qt.dot(Cs).dot(Qt) + eva_QtCsQt, eve_QtCsQt = np.linalg.eigh(Qt_Cs_Qt) + QtCsQt = eve_QtCsQt.dot(np.sqrt(np.diag(eva_QtCsQt))).dot(eve_QtCsQt.T) + ts = np.linalg.inv(Qt).dot(QtCsQt).dot(np.linalg.inv(Qt)).dot(t) + matched_img = ts.reshape(*target_img.transpose(2,0,1).shape).transpose(1,2,0) + matched_img += mu_s + matched_img[matched_img>1] = 1 + matched_img[matched_img<0] = 0 + return np.clip(matched_img.astype(source_img.dtype), 0, 1) + +def lab_image_stats(image): + # compute the mean and standard deviation of each channel + (l, a, b) = cv2.split(image) + (lMean, lStd) = (l.mean(), l.std()) + (aMean, aStd) = (a.mean(), a.std()) + (bMean, bStd) = (b.mean(), b.std()) + + # return the color statistics + return (lMean, lStd, aMean, aStd, bMean, bStd) + +def _scale_array(arr, clip=True): + if clip: + return np.clip(arr, 0, 255) + + mn = arr.min() + mx = arr.max() + scale_range = (max([mn, 0]), min([mx, 255])) + + if mn < scale_range[0] or mx > scale_range[1]: + return (scale_range[1] - scale_range[0]) * (arr - mn) / (mx - mn) + scale_range[0] + + return arr + +def channel_hist_match(source, template, hist_match_threshold=255, mask=None): + # Code borrowed from: + # https://stackoverflow.com/questions/32655686/histogram-matching-of-two-images-in-python-2-x + masked_source = source + masked_template = template + + if mask is not None: + masked_source = source * mask + masked_template = template * mask + + oldshape = source.shape + source = source.ravel() + template = template.ravel() + masked_source = masked_source.ravel() + masked_template = masked_template.ravel() + s_values, bin_idx, s_counts = np.unique(source, return_inverse=True, + return_counts=True) + t_values, t_counts = np.unique(template, return_counts=True) + + s_quantiles = np.cumsum(s_counts).astype(np.float64) + s_quantiles = hist_match_threshold * s_quantiles / s_quantiles[-1] + t_quantiles = np.cumsum(t_counts).astype(np.float64) + t_quantiles = 255 * t_quantiles / t_quantiles[-1] + interp_t_values = np.interp(s_quantiles, t_quantiles, t_values) + + return interp_t_values[bin_idx].reshape(oldshape) + +def color_hist_match(src_im, tar_im, hist_match_threshold=255): + h,w,c = src_im.shape + matched_R = channel_hist_match(src_im[:,:,0], tar_im[:,:,0], hist_match_threshold, None) + matched_G = channel_hist_match(src_im[:,:,1], tar_im[:,:,1], hist_match_threshold, None) + matched_B = channel_hist_match(src_im[:,:,2], tar_im[:,:,2], hist_match_threshold, None) + + to_stack = (matched_R, matched_G, matched_B) + for i in range(3, c): + to_stack += ( src_im[:,:,i],) + + + matched = np.stack(to_stack, axis=-1).astype(src_im.dtype) + return matched + +def color_transfer_mix(img_src,img_trg): + img_src = np.clip(img_src*255.0, 0, 255).astype(np.uint8) + img_trg = np.clip(img_trg*255.0, 0, 255).astype(np.uint8) + + img_src_lab = cv2.cvtColor(img_src, cv2.COLOR_BGR2LAB) + img_trg_lab = cv2.cvtColor(img_trg, cv2.COLOR_BGR2LAB) + + rct_light = np.clip ( linear_color_transfer(img_src_lab[...,0:1].astype(np.float32)/255.0, + img_trg_lab[...,0:1].astype(np.float32)/255.0 )[...,0]*255.0, + 0, 255).astype(np.uint8) + + img_src_lab[...,0] = (np.ones_like (rct_light)*100).astype(np.uint8) + img_src_lab = cv2.cvtColor(img_src_lab, cv2.COLOR_LAB2BGR) + + img_trg_lab[...,0] = (np.ones_like (rct_light)*100).astype(np.uint8) + img_trg_lab = cv2.cvtColor(img_trg_lab, cv2.COLOR_LAB2BGR) + + img_rct = color_transfer_sot( img_src_lab.astype(np.float32), img_trg_lab.astype(np.float32) ) + img_rct = np.clip(img_rct, 0, 255).astype(np.uint8) + + img_rct = cv2.cvtColor(img_rct, cv2.COLOR_BGR2LAB) + img_rct[...,0] = rct_light + img_rct = cv2.cvtColor(img_rct, cv2.COLOR_LAB2BGR) + + + return (img_rct / 255.0).astype(np.float32) + +def color_transfer(ct_mode, img_src, img_trg): + """ + color transfer for [0,1] float32 inputs + """ + if ct_mode == 'lct': + out = linear_color_transfer(img_src, img_trg) + elif ct_mode == 'rct': + out = reinhard_color_transfer(img_src, img_trg) + elif ct_mode == 'mkl': + out = color_transfer_mkl(img_src, img_trg) + elif ct_mode == 'idt': + out = color_transfer_idt(img_src, img_trg) + elif ct_mode == 'sot': + out = color_transfer_sot(img_src, img_trg) + out = np.clip( out, 0.0, 1.0) + else: + raise ValueError(f"unknown ct_mode {ct_mode}") + return out diff --git a/data/image_feature_dict.pkl b/data/image_feature_dict.pkl new file mode 100644 index 0000000000000000000000000000000000000000..40e763faa267876bc8c51a102056f09ec60adc8b --- /dev/null +++ b/data/image_feature_dict.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32c5871c89e526e5c088cbee5db03b87135c27be4d985cfcd78c8ce02a4af482 +size 3975088 diff --git a/data/source/demo.mp4 b/data/source/demo.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..5446b51c12abda0e7aad9d958c8f9e18c25f471d --- /dev/null +++ b/data/source/demo.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:754d104f06af6d80da356c8311bbcabf1f3fd467cebd98a8b658d9e94f7507b8 +size 2402911 diff --git a/data/source/elon-musk1.jpg b/data/source/elon-musk1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ecfe636ebbd3226493f35b05290f374b5c4a89e6 Binary files /dev/null and b/data/source/elon-musk1.jpg differ diff --git a/face_detect/FaceType.py b/face_detect/FaceType.py new file mode 100644 index 0000000000000000000000000000000000000000..745cff320ee177bc4865a1c417a30e464f543e78 --- /dev/null +++ b/face_detect/FaceType.py @@ -0,0 +1,37 @@ +from enum import IntEnum + +class FaceType(IntEnum): + #enumerating in order "next contains prev" + HALF = 0 + MID_FULL = 1 + FULL = 2 + FULL_NO_ALIGN = 3 + WHOLE_FACE = 4 + HEAD = 10 + HEAD_NO_ALIGN = 20 + + MARK_ONLY = 100, #no align at all, just embedded faceinfo + + @staticmethod + def fromString (s): + r = from_string_dict.get (s.lower()) + if r is None: + raise Exception ('FaceType.fromString value error') + return r + + @staticmethod + def toString (face_type): + return to_string_dict[face_type] + +to_string_dict = { FaceType.HALF : 'half_face', + FaceType.MID_FULL : 'midfull_face', + FaceType.FULL : 'full_face', + FaceType.FULL_NO_ALIGN : 'full_face_no_align', + FaceType.WHOLE_FACE : 'whole_face', + FaceType.HEAD : 'head', + FaceType.HEAD_NO_ALIGN : 'head_no_align', + + FaceType.MARK_ONLY :'mark_only', + } + +from_string_dict = { to_string_dict[x] : x for x in to_string_dict.keys() } \ No newline at end of file diff --git a/face_detect/LandmarksProcessor.py b/face_detect/LandmarksProcessor.py new file mode 100644 index 0000000000000000000000000000000000000000..16f601a31d3155553dc787b41c05367dfcf9c3fb --- /dev/null +++ b/face_detect/LandmarksProcessor.py @@ -0,0 +1,1482 @@ +import colorsys +import math +from enum import IntEnum + +import cv2 +import numpy as np +import numpy.linalg as npla + +from face_detect.core import imagelib +from face_detect.core import mathlib +from face_detect.core.mathlib.umeyama import umeyama +from face_detect.FaceType import FaceType + +mesh_33=[70,63,105,66,107,336,296,334,293,300,168,197,5,4,240,99,2,328,460,33,160,158,133,153,144,362,385,387,263,373,380,57,287] +landmarks_2D_4=np.array([ +[0.224152 , 0.2119465], #left iris mean 37 38 40 41 +[0.75610125, 0.2119465],#right iris mean 43 44 46 47 +[0.490127, 0.515625], # nose 30 +[0.4901265, 0.780233] #mouth mean 48 54 +]) +landmarks_2D_4_bottom=np.array([ +[0.2218305, 0.244588 ], #left iris mean 40 41 +[0.7584225, 0.244588],#right iris mean 46 47 +[0.490127, 0.515625], # nose 30 +[0.4901265, 0.780233] #mouth mean 48 54 +]) +landmarks_2D = np.array([ + [0.000213256, 0.106454], # 17 + [0.0752622, 0.038915], # 18 + [0.18113, 0.0187482], # 19 + [0.29077, 0.0344891], # 20 + [0.393397, 0.0773906], # 21 + [0.586856, 0.0773906], # 22 + [0.689483, 0.0344891], # 23 + [0.799124, 0.0187482], # 24 + [0.904991, 0.038915], # 25 + [0.98004, 0.106454], # 26 + [0.490127, 0.203352], # 27 + [0.490127, 0.307009], # 28 + [0.490127, 0.409805], # 29 + [0.490127, 0.515625], # 30 + [0.36688, 0.587326], # 31 + [0.426036, 0.609345], # 32 + [0.490127, 0.628106], # 33 + [0.554217, 0.609345], # 34 + [0.613373, 0.587326], # 35 + [0.121737, 0.216423], # 36 + [0.187122, 0.178758], # 37 + [0.265825, 0.179852], # 38 + [0.334606, 0.231733], # 39 + [0.260918, 0.245099], # 40 + [0.182743, 0.244077], # 41 + [0.645647, 0.231733], # 42 + [0.714428, 0.179852], # 43 + [0.793132, 0.178758], # 44 + [0.858516, 0.216423], # 45 + [0.79751, 0.244077], # 46 + [0.719335, 0.245099], # 47 + [0.254149, 0.780233], # 48 + [0.340985, 0.745405], # 49 + [0.428858, 0.727388], # 50 + [0.490127, 0.742578], # 51 + [0.551395, 0.727388], # 52 + [0.639268, 0.745405], # 53 + [0.726104, 0.780233], # 54 + [0.642159, 0.864805], # 55 + [0.556721, 0.902192], # 56 + [0.490127, 0.909281], # 57 + [0.423532, 0.902192], # 58 + [0.338094, 0.864805], # 59 + [0.290379, 0.784792], # 60 + [0.428096, 0.778746], # 61 + [0.490127, 0.785343], # 62 + [0.552157, 0.778746], # 63 + [0.689874, 0.784792], # 64 + [0.553364, 0.824182], # 65 + [0.490127, 0.831803], # 66 + [0.42689, 0.824182] # 67 +], dtype=np.float32) + +landmarks_2D_new = np.array([ + [0.000213256, 0.106454], # 17 + [0.0752622, 0.038915], # 18 + [0.18113, 0.0187482], # 19 + [0.29077, 0.0344891], # 20 + [0.393397, 0.0773906], # 21 + [0.586856, 0.0773906], # 22 + [0.689483, 0.0344891], # 23 + [0.799124, 0.0187482], # 24 + [0.904991, 0.038915], # 25 + [0.98004, 0.106454], # 26 + [0.490127, 0.203352], # 27 + [0.490127, 0.307009], # 28 + [0.490127, 0.409805], # 29 + [0.490127, 0.515625], # 30 + [0.36688, 0.587326], # 31 + [0.426036, 0.609345], # 32 + [0.490127, 0.628106], # 33 + [0.554217, 0.609345], # 34 + [0.613373, 0.587326], # 35 + [0.121737, 0.216423], # 36 + [0.187122, 0.178758], # 37 + [0.265825, 0.179852], # 38 + [0.334606, 0.231733], # 39 + [0.260918, 0.245099], # 40 + [0.182743, 0.244077], # 41 + [0.645647, 0.231733], # 42 + [0.714428, 0.179852], # 43 + [0.793132, 0.178758], # 44 + [0.858516, 0.216423], # 45 + [0.79751, 0.244077], # 46 + [0.719335, 0.245099], # 47 + [0.254149, 0.780233], # 48 + [0.726104, 0.780233], # 54 +], dtype=np.float32) +landmarks_2D_new_mesh = np.array([ +[ 0.000213256, 0.106454 ], #17 +[ 0.0752622, 0.038915 ], #18 +[0.1281961, 0.0288316], #19[ 0.18113, 0.0187482 ] +[ 0.29077, 0.0144891 ], #20 +[ 0.393397, 0.0773906 ], #21 +[ 0.586856, 0.0773906 ], #22 +[ 0.689483, 0.0144891 ], #23 +[0.8520575, 0.0288316], #24[ 0.799124, 0.0187482 ] +[ 0.904991, 0.038915 ], #25 +[ 0.98004, 0.106454 ], #26 +[ 0.490127, 0.203352 ], #27 +[ 0.490127, 0.307009 ], #28 +[ 0.490127, 0.409805 ], #29 +[ 0.490127, 0.515625 ], #30 +[0.396458 , 0.5983355], #31 [ 0.36688, 0.587326 ] +[ 0.426036, 0.609345 ], #32 +[ 0.490127, 0.628106 ], #33 +[ 0.554217, 0.609345 ], #34 +[ 0.613373, 0.587326 ], #35 +[ 0.071737, 0.136423 ], #36 +[ 0.137122, 0.118758 ], #37 +[ 0.215825, 0.119852 ], #38 +[ 0.334606, 0.151733 ], #39 +[ 0.210918, 0.165099 ], #40 +[ 0.132743, 0.164077 ], #41 +[ 0.645647, 0.151733 ], #42 +[ 0.764428, 0.119852 ], #43 +[ 0.743132, 0.118758 ], #44 +[ 0.908516, 0.136423 ], #45 +[ 0.84751, 0.164077 ], #46 +[ 0.769335, 0.165099 ], #47 +[ 0.254149, 0.780233 ], #48 +[ 0.726104, 0.780233 ], #54 +], dtype=np.float32) + +# landmarks_468_moving_parts_indexes = [0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 46, 52, 53, 54, 55, 56, 57, 58, 61, 62, 63, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 76, 77, 78, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 95, 96, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 117, 118, 124, 130, 132, 133, 135, 136, 138, 139, 140, 143, 144, 145, 146, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 168, 169, 170, 171, 172, 173, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 189, 190, 191, 192, 193, 194, 199, 200, 201, 202, 204, 208, 210, 211, 212, 213, 214, 215, 221, 222, 223, 224, 225, 226, 228, 229, 230, 231, 232, 233, 243, 244, 245, 246, 247, 249, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 267, 268, 269, 270, 271, 272, 273, 276, 282, 283, 284, 285, 286, 287, 288, 291, 292, 293, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 306, 307, 308, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 324, 325, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 346, 347, 353, 359, 361, 362, 364, 365, 367, 368, 369, 372, 373, 374, 375, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 394, 395, 396, 397, 398, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 413, 414, 415, 416, 417, 418, 421, 422, 424, 428, 430, 431, 432, 433, 434, 435, 441, 442, 443, 444, 445, 446, 448, 449, 450, 451, 452, 453, 463, 464, 465, 466, 467] +# uni_landmarks_468 = np.array( +# [[ 0.49066195, 0.7133885 ], +# [ 0.49042386, 0.52723485], +# [ 0.49050152, 0.6244965 ], +# [ 0.45844677, 0.39348277], +# [ 0.4905825 , 0.49120593], +# [ 0.49006602, 0.43998772], +# [ 0.48907965, 0.26775706], +# [ 0.11721139, 0.23243594], +# [ 0.48957095, 0.11063451], +# [ 0.48949632, 0.03535742], +# [ 0.48905632, -0.25326234], +# [ 0.4907858 , 0.73766613], +# [ 0.49081355, 0.7606857 ], +# [ 0.4908666 , 0.7839426 ], +# [ 0.49079415, 0.78913504], +# [ 0.4908271 , 0.80801845], +# [ 0.49086872, 0.831855 ], +# [ 0.49092326, 0.8631041 ], +# [ 0.49104446, 0.94170016], +# [ 0.49009967, 0.5546924 ], +# [ 0.44398275, 0.5741402 ], +# [-0.2106727 , 0.00861922], +# [ 0.2523662 , 0.2832579 ], +# [ 0.2042254 , 0.28945392], +# [ 0.1552372 , 0.28322184], +# [ 0.09056008, 0.24730967], +# [ 0.30096018, 0.27277085], +# [ 0.21548809, 0.16713436], +# [ 0.2595488 , 0.17071684], +# [ 0.16957955, 0.17298089], +# [ 0.13164258, 0.18425746], +# [ 0.043018 , 0.28581 ], +# [ 0.30856833, 1.0507976 ], +# [ 0.10015843, 0.22331452], +# [-0.20773543, 0.26701325], +# [-0.02414621, 0.25144747], +# [ 0.23481508, 0.5045001 ], +# [ 0.44063616, 0.7097012 ], +# [ 0.4449884 , 0.762481 ], +# [ 0.3840104 , 0.7218947 ], +# [ 0.33943903, 0.73847425], +# [ 0.40284824, 0.76374006], +# [ 0.36457124, 0.76704985], +# [ 0.26937196, 0.84716266], +# [ 0.46683946, 0.5275276 ], +# [ 0.4642676 , 0.49167544], +# [ 0.06039319, 0.11509081], +# [ 0.31504983, 0.36394927], +# [ 0.3660137 , 0.52945083], +# [ 0.3509634 , 0.50311893], +# [ 0.09496811, 0.5005815 ], +# [ 0.46075967, 0.4424029 ], +# [ 0.20108324, 0.05883435], +# [ 0.12877828, 0.07731954], +# [-0.09675749, -0.09848522], +# [ 0.39672711, 0.09345116], +# [ 0.29908365, 0.18449144], +# [ 0.23298171, 0.7922538 ], +# [-0.27583498, 0.85219014], +# [ 0.38898414, 0.5723152 ], +# [ 0.41446668, 0.59347576], +# [ 0.28167963, 0.7884952 ], +# [ 0.30013445, 0.7875627 ], +# [ 0.09448256, 0.03961415], +# [ 0.3531811 , 0.5553779 ], +# [ 0.2873921 , 0.05599196], +# [ 0.28232294, 0.01076962], +# [ 0.1903341 , -0.23029903], +# [ 0.0108011 , -0.03099815], +# [ 0.24915197, -0.10741784], +# [ 0.01047484, 0.08868673], +# [-0.08942058, 0.05201372], +# [ 0.44268388, 0.7376863 ], +# [ 0.39652622, 0.741894 ], +# [ 0.35389552, 0.7514722 ], +# [ 0.393559 , 0.5851372 ], +# [ 0.2925385 , 0.7871472 ], +# [ 0.31904542, 0.80939215], +# [ 0.32005206, 0.787085 ], +# [ 0.4195982 , 0.5444628 ], +# [ 0.3688312 , 0.78418756], +# [ 0.40608776, 0.7841225 ], +# [ 0.4472093 , 0.78405076], +# [ 0.43053833, 0.9379409 ], +# [ 0.44192585, 0.8617842 ], +# [ 0.44321233, 0.82923037], +# [ 0.4432334 , 0.80578357], +# [ 0.44304678, 0.78921837], +# [ 0.36314115, 0.7893578 ], +# [ 0.36057413, 0.8040033 ], +# [ 0.35472178, 0.8187327 ], +# [ 0.34614718, 0.83330894], +# [ 0.2959003 , 0.69076014], +# [-0.37090415, 0.5509728 ], +# [ 0.4903264 , 0.5851119 ], +# [ 0.3370172 , 0.78961957], +# [ 0.33070365, 0.8010128 ], +# [ 0.43397966, 0.6231119 ], +# [ 0.35356513, 0.59569615], +# [ 0.42509514, 0.6093918 ], +# [ 0.2635329 , 0.39636588], +# [ 0.19704658, 0.43663597], +# [ 0.33384863, 0.52658314], +# [ 0.03225203, -0.18047164], +# [ 0.11854403, -0.08533629], +# [ 0.18350407, 0.01215954], +# [ 0.31292278, 0.8845064 ], +# [ 0.3862302 , 0.02093028], +# [ 0.36480215, -0.1098879 ], +# [ 0.33342764, -0.2497105 ], +# [ 0.11592615, 0.2646692 ], +# [-0.00803981, 0.3294946 ], +# [ 0.33535972, 0.26431814], +# [ 0.05940344, 0.18766014], +# [ 0.36188984, 0.33336782], +# [ 0.39879864, 0.50869733], +# [-0.07952328, 0.36885905], +# [ 0.04230375, 0.36800843], +# [ 0.11137532, 0.3864613 ], +# [ 0.19386435, 0.37397826], +# [ 0.25749052, 0.34993485], +# [ 0.310977 , 0.3240539 ], +# [ 0.44813582, 0.2762354 ], +# [-0.06039021, 0.4864401 ], +# [ 0.00945808, 0.17624807], +# [ 0.4739895 , 0.55369264], +# [ 0.32125092, 0.4170324 ], +# [-0.36162117, 0.27013144], +# [ 0.3592803 , 0.3023075 ], +# [ 0.30784345, 0.529875 ], +# [ 0.07601253, 0.22579695], +# [ 0.3824061 , 0.47686696], +# [-0.33810768, 0.70034444], +# [ 0.34643772, 0.24336138], +# [ 0.42429656, 0.45338264], +# [ 0.02854156, 0.939626 ], +# [-0.04352415, 1.0322431 ], +# [-0.20510256, 0.51651907], +# [-0.06969981, 0.8698207 ], +# [-0.1581445 , 0.14948419], +# [ 0.2889787 , 1.1224228 ], +# [ 0.47446907, 0.58377683], +# [ 0.2818322 , 0.4586393 ], +# [-0.08708218, 0.2627534 ], +# [ 0.16877942, 0.25976214], +# [ 0.21234928, 0.267416 ], +# [ 0.30676025, 0.81592965], +# [-0.06259334, 0.6009466 ], +# [ 0.36930662, 1.2302231 ], +# [ 0.17070079, 1.149443 ], +# [ 0.07714309, 1.0989524 ], +# [ 0.48931465, -0.1052461 ], +# [ 0.49159575, 1.2484183 ], +# [ 0.2527582 , 0.26420003], +# [ 0.30066028, 0.25829503], +# [ 0.3310663 , 0.25034374], +# [-0.05075949, 0.16421606], +# [ 0.29250854, 0.19938153], +# [ 0.2522571 , 0.18826446], +# [ 0.21220936, 0.18724632], +# [ 0.16866222, 0.19260857], +# [ 0.13789575, 0.2011967 ], +# [-0.29335994, 0.12383505], +# [ 0.1379709 , 0.24424627], +# [ 0.49057597, 0.65296 ], +# [ 0.34147182, 0.663431 ], +# [ 0.3941785 , 0.5603462 ], +# [ 0.43007633, 0.6569765 ], +# [ 0.48963526, 0.17996965], +# [ 0.11681002, 1.0107123 ], +# [ 0.19942053, 1.068824 ], +# [ 0.38605705, 1.1563928 ], +# [-0.16756529, 0.9615808 ], +# [ 0.32817602, 0.21989337], +# [ 0.41141313, 0.3578073 ], +# [ 0.49127796, 1.1678538 ], +# [ 0.27080515, 1.195178 ], +# [-0.19307071, 0.6481067 ], +# [ 0.399859 , 0.7892937 ], +# [ 0.39875022, 0.80587196], +# [ 0.39717573, 0.8256797 ], +# [ 0.3931817 , 0.85224336], +# [ 0.3670306 , 0.9161113 ], +# [ 0.3256227 , 0.7724022 ], +# [ 0.31488904, 0.76426226], +# [ 0.3001029 , 0.7583232 ], +# [ 0.2565659 , 0.73397243], +# [ 0.0438394 , 0.6234349 ], +# [ 0.40628996, 0.30296788], +# [ 0.37707803, 0.19498621], +# [ 0.34125936, 0.21069102], +# [ 0.33733743, 0.7842425 ], +# [ 0.00882016, 0.769232 ], +# [ 0.4335431 , 0.1821002 ], +# [ 0.33409703, 0.9826546 ], +# [ 0.49011812, 0.3896104 ], +# [ 0.45311242, 0.34152514], +# [ 0.4899982 , 0.33611432], +# [ 0.369907 , 0.43193236], +# [ 0.49116373, 1.0932964 ], +# [ 0.49107185, 1.0132186 ], +# [ 0.41421878, 1.008873 ], +# [ 0.21551576, 0.8785059 ], +# [ 0.27587482, 0.57461077], +# [ 0.2683325 , 0.9399872 ], +# [ 0.17091931, 0.56899554], +# [ 0.23741819, 0.6283017 ], +# [ 0.12783033, 0.65916985], +# [ 0.39875996, 1.0855893 ], +# [ 0.33251646, 0.45881665], +# [ 0.16138549, 0.93153137], +# [ 0.23269826, 0.99740875], +# [ 0.17994387, 0.8051213 ], +# [-0.06026869, 0.7033027 ], +# [ 0.10063827, 0.8241594 ], +# [-0.15810522, 0.7679798 ], +# [ 0.2014156 , 0.7000692 ], +# [ 0.365875 , 0.3839739 ], +# [ 0.4115726 , 0.5293855 ], +# [ 0.378973 , 0.5476473 ], +# [ 0.43235463, 0.49621448], +# [ 0.3385827 , 0.15134089], +# [ 0.27179635, 0.12940899], +# [ 0.21341887, 0.12485553], +# [ 0.15807948, 0.12881717], +# [ 0.10610204, 0.14814937], +# [ 0.03133116, 0.236169 ], +# [-0.21341309, 0.38895622], +# [ 0.07818349, 0.3101151 ], +# [ 0.1318462 , 0.32528982], +# [ 0.19485526, 0.32642388], +# [ 0.25329807, 0.31256682], +# [ 0.30569646, 0.29578218], +# [ 0.34839994, 0.2842457 ], +# [-0.3824783 , 0.41054142], +# [ 0.37162504, 0.5664833 ], +# [ 0.41687053, 0.40615496], +# [ 0.4433516 , 0.5242282 ], +# [ 0.44805393, 0.5562703 ], +# [ 0.43453053, 0.5407472 ], +# [ 0.37351128, 0.58924097], +# [ 0.46121803, 0.55474806], +# [ 0.45942986, 0.5810936 ], +# [ 0.35955238, 0.24802393], +# [ 0.38181108, 0.25985107], +# [ 0.40143687, 0.26679716], +# [ 0.11717269, 0.2102652 ], +# [ 0.0940459 , 0.2016577 ], +# [ 0.5217974 , 0.39331725], +# [ 0.8625129 , 0.23113514], +# [ 0.5369363 , 0.57397795], +# [ 1.1896138 , 0.00617525], +# [ 0.7275363 , 0.28242856], +# [ 0.7756985 , 0.2884565 ], +# [ 0.82466465, 0.28205347], +# [ 0.88921595, 0.24591576], +# [ 0.6788919 , 0.27210945], +# [ 0.7640089 , 0.166177 ], +# [ 0.7199609 , 0.16991326], +# [ 0.8099376 , 0.17186326], +# [ 0.8479136 , 0.18300733], +# [ 0.9368992 , 0.28424102], +# [ 0.67367214, 1.0503516 ], +# [ 0.8795338 , 0.22195426], +# [ 1.1875838 , 0.26458502], +# [ 1.0039485 , 0.24965489], +# [ 0.74551606, 0.50375396], +# [ 0.54075617, 0.7095265 ], +# [ 0.5365969 , 0.76231945], +# [ 0.59742403, 0.7215222 ], +# [ 0.6420548 , 0.7379461 ], +# [ 0.5787324 , 0.7634331 ], +# [ 0.617019 , 0.766611 ], +# [ 0.71218634, 0.8469107 ], +# [ 0.513503 , 0.52683127], +# [ 0.5170686 , 0.49132976], +# [ 0.91894245, 0.11362247], +# [ 0.66487545, 0.36299667], +# [ 0.61502695, 0.52894545], +# [ 0.6296784 , 0.50242335], +# [ 0.88566196, 0.49919614], +# [ 0.5193738 , 0.4423927 ], +# [ 0.7780587 , 0.05788935], +# [ 0.8504331 , 0.07610969], +# [ 1.0753254 , -0.1005309 ], +# [ 0.5824533 , 0.09305263], +# [ 0.6804744 , 0.18382579], +# [ 0.7485537 , 0.79121745], +# [ 1.2577202 , 0.8495136 ], +# [ 0.59192824, 0.57196105], +# [ 0.5665197 , 0.59321034], +# [ 0.6999867 , 0.7877651 ], +# [ 0.6814933 , 0.7868972 ], +# [ 0.8846023 , 0.03829005], +# [ 0.62761134, 0.5547819 ], +# [ 0.6917209 , 0.05532694], +# [ 0.6966465 , 0.01012804], +# [ 0.7876697 , -0.2309872 ], +# [ 0.9680314 , -0.03263693], +# [ 0.7294528 , -0.1080169 ], +# [ 0.96877015, 0.08704082], +# [ 1.0685298 , 0.05000517], +# [ 0.538806 , 0.7375185 ], +# [ 0.5849781 , 0.7415651 ], +# [ 0.62764204, 0.7509944 ], +# [ 0.58739805, 0.5847989 ], +# [ 0.68912315, 0.78645504], +# [ 0.6626941 , 0.8087924 ], +# [ 0.6616096 , 0.7864889 ], +# [ 0.5612171 , 0.5442156 ], +# [ 0.61282057, 0.7837617 ], +# [ 0.575564 , 0.7838267 ], +# [ 0.5344426 , 0.7838985 ], +# [ 0.551505 , 0.93764293], +# [ 0.5399973 , 0.8616131 ], +# [ 0.53859717, 0.8290639 ], +# [ 0.5384943 , 0.8056173 ], +# [ 0.53862303, 0.78905153], +# [ 0.6185288 , 0.78891206], +# [ 0.62114686, 0.8035485 ], +# [ 0.62705064, 0.81825733], +# [ 0.635676 , 0.8328036 ], +# [ 0.6854969 , 0.69067734], +# [ 1.3517375 , 0.54796624], +# [ 0.64465326, 0.78908265], +# [ 0.6510032 , 0.8004538 ], +# [ 0.5471015 , 0.62291807], +# [ 0.62742317, 0.59512955], +# [ 0.55593795, 0.6091671 ], +# [ 0.7161671 , 0.39546603], +# [ 0.7836529 , 0.435396 ], +# [ 0.64694774, 0.5258542 ], +# [ 0.94603044, -0.1820665 ], +# [ 0.86011904, -0.08652072], +# [ 0.79549086, 0.01118712], +# [ 0.66893554, 0.8840338 ], +# [ 0.59274685, 0.02056277], +# [ 0.613851 , -0.11025709], +# [ 0.64526045, -0.25000137], +# [ 0.8639107 , 0.26336375], +# [ 0.9881146 , 0.3277454 ], +# [ 0.6445285 , 0.26371115], +# [ 0.92017305, 0.18616839], +# [ 0.61790556, 0.3323734 ], +# [ 0.58225924, 0.5077285 ], +# [ 1.0597262 , 0.36687428], +# [ 0.93791103, 0.36642405], +# [ 0.86892897, 0.38505408], +# [ 0.78624976, 0.37287512], +# [ 0.7223912 , 0.34902957], +# [ 0.6687594 , 0.32310694], +# [ 0.5315497 , 0.2757726 ], +# [ 1.0409807 , 0.48452145], +# [ 0.9700836 , 0.17458573], +# [ 0.5065989 , 0.55419755], +# [ 0.6590531 , 0.41624966], +# [ 1.3414742 , 0.26715896], +# [ 0.62023264, 0.30108824], +# [ 0.67289865, 0.5290446 ], +# [ 0.9036883 , 0.22435239], +# [ 0.59769833, 0.47659585], +# [ 1.3194624 , 0.6974514 ], +# [ 0.63339525, 0.24286939], +# [ 0.5571053 , 0.45250946], +# [ 0.9535533 , 0.9380257 ], +# [ 1.0260391 , 1.0303764 ], +# [ 1.1858007 , 0.51410204], +# [ 1.0515786 , 0.867869 ], +# [ 1.1375865 , 0.14722979], +# [ 0.6935665 , 1.1218798 ], +# [ 0.5063422 , 0.58382744], +# [ 0.69926125, 0.45745537], +# [ 1.0669235 , 0.26074636], +# [ 0.8110406 , 0.25864118], +# [ 0.7674977 , 0.26644707], +# [ 0.67500204, 0.81528693], +# [ 1.0435516 , 0.5990178 ], +# [ 0.6121316 , 1.2306852 ], +# [ 0.81222653, 1.1483234 ], +# [ 0.9056057 , 1.0975065 ], +# [ 0.7270778 , 0.26337218], +# [ 0.6791554 , 0.25763443], +# [ 0.6487802 , 0.24975733], +# [ 1.0302606 , 0.16233999], +# [ 0.68710136, 0.19869283], +# [ 0.72731376, 0.18743533], +# [ 0.7673578 , 0.1862774 ], +# [ 0.81092334, 0.1914876 ], +# [ 0.84171957, 0.1999683 ], +# [ 1.2727026 , 0.12110176], +# [ 0.8417947 , 0.24301787], +# [ 0.63978463, 0.6627527 ], +# [ 0.5866921 , 0.5600102 ], +# [ 0.5511283 , 0.6567636 ], +# [ 0.8655194 , 1.009457 ], +# [ 0.78306264, 1.0678959 ], +# [ 0.59620714, 1.1564037 ], +# [ 1.149833 , 0.9592815 ], +# [ 0.65151644, 0.21932903], +# [ 0.56865776, 0.3571483 ], +# [ 0.71228063, 1.1944076 ], +# [ 1.1742088 , 0.6457327 ], +# [ 0.5818109 , 0.78897613], +# [ 0.5829775 , 0.80555046], +# [ 0.5846211 , 0.82535255], +# [ 0.5887078 , 0.8519021 ], +# [ 0.6150045 , 0.916079 ], +# [ 0.65597004, 0.771831 ], +# [ 0.66669285, 0.7636482 ], +# [ 0.6814582 , 0.7576576 ], +# [ 0.7245435 , 0.73241323], +# [ 0.9371713 , 0.62184393], +# [ 0.5736738 , 0.30186948], +# [ 0.60240346, 0.19448838], +# [ 0.6383993 , 0.21017241], +# [ 0.64431435, 0.7837067 ], +# [ 0.9726586 , 0.7675604 ], +# [ 0.54576766, 0.18157108], +# [ 0.6477745 , 0.98230904], +# [ 0.5269076 , 0.34123868], +# [ 0.61068684, 0.43131724], +# [ 0.56792 , 1.0087004 ], +# [ 0.7662271 , 0.8776794 ], +# [ 0.7048996 , 0.57387614], +# [ 0.7136024 , 0.9394351 ], +# [ 0.8097781 , 0.56784695], +# [ 0.7435453 , 0.62753886], +# [ 0.85328954, 0.6578133 ], +# [ 0.5835228 , 1.0854707 ], +# [ 0.64810187, 0.45811343], +# [ 0.82059515, 0.9304676 ], +# [ 0.7494546 , 0.9966611 ], +# [ 0.8015866 , 0.80400985], +# [ 1.0415541 , 0.70138854], +# [ 0.8809724 , 0.8228132 ], +# [ 1.1396528 , 0.7657218 ], +# [ 0.7798614 , 0.69881856], +# [ 0.6143189 , 0.383193 ], +# [ 0.56934875, 0.52867246], +# [ 0.60162777, 0.54706186], +# [ 0.5470082 , 0.4963955 ], +# [ 0.6408297 , 0.15073723], +# [ 0.7075675 , 0.12865019], +# [ 0.76593757, 0.12391254], +# [ 0.8212976 , 0.12768434], +# [ 0.87334216, 0.14682971], +# [ 0.948411 , 0.23457018], +# [ 1.1936799 , 0.38651106], +# [ 0.90181875, 0.30865455], +# [ 0.84818983, 0.3240165 ], +# [ 0.7851249 , 0.32537246], +# [ 0.72658616, 0.3116911 ], +# [ 0.6740513 , 0.2949461 ], +# [ 0.63111407, 0.28325075], +# [ 1.362823 , 0.4074953 ], +# [ 0.60951644, 0.5658945 ], +# [ 0.5634702 , 0.4055624 ], +# [ 0.5374476 , 0.5247268 ], +# [ 0.53280455, 0.5561224 ], +# [ 0.5462737 , 0.5405522 ], +# [ 0.6075077 , 0.58877414], +# [ 0.51933056, 0.55477065], +# [ 0.52143395, 0.58103496], +# [ 0.62030756, 0.24758299], +# [ 0.59746987, 0.2574137 ], +# [ 0.5780933 , 0.2652785 ], +# [ 0.8624742 , 0.2089644 ], +# [ 0.8855709 , 0.20027623]], dtype=np.float32) + +# mesh_33 = np.arange(468) +# mask = np.ones(len(mesh_33), dtype=bool) +# mask[landmarks_468_moving_parts_indexes]=False +# mesh_33=mesh_33[mask,...] +# landmarks_2D_new_mesh=uni_landmarks_468[mask,...] +# mouth_center_landmarks_2D = np.array([ +# [-4.4202591e-07, 4.4916576e-01], # 48 +# [1.8399176e-01, 3.7537053e-01], # 49 +# [3.7018123e-01, 3.3719531e-01], # 50 +# [5.0000089e-01, 3.6938059e-01], # 51 +# [6.2981832e-01, 3.3719531e-01], # 52 +# [8.1600773e-01, 3.7537053e-01], # 53 +# [1.0000000e+00, 4.4916576e-01], # 54 +# [8.2213330e-01, 6.2836081e-01], # 55 +# [6.4110327e-01, 7.0757812e-01], # 56 +# [5.0000089e-01, 7.2259867e-01], # 57 +# [3.5889623e-01, 7.0757812e-01], # 58 +# [1.7786618e-01, 6.2836081e-01], # 59 +# [7.6765373e-02, 4.5882553e-01], # 60 +# [3.6856663e-01, 4.4601500e-01], # 61 +# [5.0000089e-01, 4.5999300e-01], # 62 +# [6.3143289e-01, 4.4601500e-01], # 63 +# [9.2323411e-01, 4.5882553e-01], # 64 +# [6.3399029e-01, 5.4228687e-01], # 65 +# [5.0000089e-01, 5.5843467e-01], # 66 +# [3.6601129e-01, 5.4228687e-01] # 67 +# ], dtype=np.float32) + +# 68 point landmark definitions +landmarks_68_pt = {"mouth": (48, 68), + "right_eyebrow": (17, 22), + "left_eyebrow": (22, 27), + "right_eye": (36, 42), + "left_eye": (42, 48), + "nose": (27, 36), # missed one point + "jaw": (0, 17)} + +landmarks_68_3D = np.array([ + [-73.393523, -29.801432, 47.667532], # 00 + [-72.775014, -10.949766, 45.909403], # 01 + [-70.533638, 7.929818, 44.842580], # 02 + [-66.850058, 26.074280, 43.141114], # 03 + [-59.790187, 42.564390, 38.635298], # 04 + [-48.368973, 56.481080, 30.750622], # 05 + [-34.121101, 67.246992, 18.456453], # 06 + [-17.875411, 75.056892, 3.609035], # 07 + [0.098749, 77.061286, -0.881698], # 08 + [17.477031, 74.758448, 5.181201], # 09 + [32.648966, 66.929021, 19.176563], # 10 + [46.372358, 56.311389, 30.770570], # 11 + [57.343480, 42.419126, 37.628629], # 12 + [64.388482, 25.455880, 40.886309], # 13 + [68.212038, 6.990805, 42.281449], # 14 + [70.486405, -11.666193, 44.142567], # 15 + [71.375822, -30.365191, 47.140426], # 16 + [-61.119406, -49.361602, 14.254422], # 17 + [-51.287588, -58.769795, 7.268147], # 18 + [-37.804800, -61.996155, 0.442051], # 19 + [-24.022754, -61.033399, -6.606501], # 20 + [-11.635713, -56.686759, -11.967398], # 21 + [12.056636, -57.391033, -12.051204], # 22 + [25.106256, -61.902186, -7.315098], # 23 + [38.338588, -62.777713, -1.022953], # 24 + [51.191007, -59.302347, 5.349435], # 25 + [60.053851, -50.190255, 11.615746], # 26 + [0.653940, -42.193790, -13.380835], # 27 + [0.804809, -30.993721, -21.150853], # 28 + [0.992204, -19.944596, -29.284036], # 29 + [1.226783, -8.414541, -36.948060], # 00 + [-14.772472, 2.598255, -20.132003], # 01 + [-7.180239, 4.751589, -23.536684], # 02 + [0.555920, 6.562900, -25.944448], # 03 + [8.272499, 4.661005, -23.695741], # 04 + [15.214351, 2.643046, -20.858157], # 05 + [-46.047290, -37.471411, 7.037989], # 06 + [-37.674688, -42.730510, 3.021217], # 07 + [-27.883856, -42.711517, 1.353629], # 08 + [-19.648268, -36.754742, -0.111088], # 09 + [-28.272965, -35.134493, -0.147273], # 10 + [-38.082418, -34.919043, 1.476612], # 11 + [19.265868, -37.032306, -0.665746], # 12 + [27.894191, -43.342445, 0.247660], # 13 + [37.437529, -43.110822, 1.696435], # 14 + [45.170805, -38.086515, 4.894163], # 15 + [38.196454, -35.532024, 0.282961], # 16 + [28.764989, -35.484289, -1.172675], # 17 + [-28.916267, 28.612716, -2.240310], # 18 + [-17.533194, 22.172187, -15.934335], # 19 + [-6.684590, 19.029051, -22.611355], # 20 + [0.381001, 20.721118, -23.748437], # 21 + [8.375443, 19.035460, -22.721995], # 22 + [18.876618, 22.394109, -15.610679], # 23 + [28.794412, 28.079924, -3.217393], # 24 + [19.057574, 36.298248, -14.987997], # 25 + [8.956375, 39.634575, -22.554245], # 26 + [0.381549, 40.395647, -23.591626], # 27 + [-7.428895, 39.836405, -22.406106], # 28 + [-18.160634, 36.677899, -15.121907], # 29 + [-24.377490, 28.677771, -4.785684], # 30 + [-6.897633, 25.475976, -20.893742], # 31 + [0.340663, 26.014269, -22.220479], # 32 + [8.444722, 25.326198, -21.025520], # 33 + [24.474473, 28.323008, -5.712776], # 34 + [8.449166, 30.596216, -20.671489], # 35 + [0.205322, 31.408738, -21.903670], # 36 + [-7.198266, 30.844876, -20.328022] # 37 +], dtype=np.float32) + +FaceType_to_padding_remove_align = { + FaceType.HALF: (0.0, False), + FaceType.MID_FULL: (0.0675, False), + FaceType.FULL: (0.2109375, False), + FaceType.FULL_NO_ALIGN: (0.2109375, True), + FaceType.WHOLE_FACE: (0.40, False), + FaceType.HEAD: (0.70, False), + FaceType.HEAD_NO_ALIGN: (0.70, True), +} + + +def convert_98_to_68(lmrks): + # jaw + result = [lmrks[0]] + for i in range(2, 16, 2): + result += [(lmrks[i] + (lmrks[i - 1] + lmrks[i + 1]) / 2) / 2] + result += [lmrks[16]] + for i in range(18, 32, 2): + result += [(lmrks[i] + (lmrks[i - 1] + lmrks[i + 1]) / 2) / 2] + result += [lmrks[32]] + + # eyebrows averaging + result += [lmrks[33], + (lmrks[34] + lmrks[41]) / 2, + (lmrks[35] + lmrks[40]) / 2, + (lmrks[36] + lmrks[39]) / 2, + (lmrks[37] + lmrks[38]) / 2, + ] + + result += [(lmrks[42] + lmrks[50]) / 2, + (lmrks[43] + lmrks[49]) / 2, + (lmrks[44] + lmrks[48]) / 2, + (lmrks[45] + lmrks[47]) / 2, + lmrks[46] + ] + + # nose + result += list(lmrks[51:60]) + + # left eye (from our view) + result += [lmrks[60], + lmrks[61], + lmrks[63], + lmrks[64], + lmrks[65], + lmrks[67]] + + # right eye + result += [lmrks[68], + lmrks[69], + lmrks[71], + lmrks[72], + lmrks[73], + lmrks[75]] + + # mouth + result += list(lmrks[76:96]) + + return np.concatenate(result).reshape((68, 2)) + + +def transform_points(points, mat, invert=False): + if invert: + mat = cv2.invertAffineTransform(mat) + points = np.expand_dims(points, axis=1) + points = cv2.transform(points, mat, points.shape) + points = np.squeeze(points) + return points + + +def get_transform_mat(image_landmarks, output_size, face_type, scale=1.0): + if not isinstance(image_landmarks, np.ndarray): + image_landmarks = np.array(image_landmarks) + + # estimate landmarks transform from global space to local aligned space with bounds [0..1] + mat = umeyama(np.concatenate([image_landmarks[17:49], image_landmarks[54:55]]), landmarks_2D_new, True)[0:2] + + # get corner points in global space + g_p = transform_points(np.float32([(0, 0), (1, 0), (1, 1), (0, 1), (0.5, 0.5)]), mat, True) + g_c = g_p[4] + + # calc diagonal vectors between corners in global space + tb_diag_vec = (g_p[2] - g_p[0]).astype(np.float32) + tb_diag_vec /= npla.norm(tb_diag_vec) + bt_diag_vec = (g_p[1] - g_p[3]).astype(np.float32) + bt_diag_vec /= npla.norm(bt_diag_vec) + + # calc modifier of diagonal vectors for scale and padding value + # print(face_type) + padding, remove_align = FaceType_to_padding_remove_align.get(face_type, 0.0) + mod = (1.0 / scale) * (npla.norm(g_p[0] - g_p[2]) * (padding * np.sqrt(2.0) + 0.5)) + + if face_type == FaceType.WHOLE_FACE: + # adjust vertical offset for WHOLE_FACE, 7% below in order to cover more forehead + vec = (g_p[0] - g_p[3]).astype(np.float32) + vec_len = npla.norm(vec) + vec /= vec_len + g_c += vec * vec_len * 0.07 + + + # calc 3 points in global space to estimate 2d affine transform + if not remove_align: + l_t = np.array([g_c - tb_diag_vec * mod, + g_c + bt_diag_vec * mod, + g_c + tb_diag_vec * mod]) + else: + # remove_align - face will be centered in the frame but not aligned + l_t = np.array([g_c - tb_diag_vec * mod, + g_c + bt_diag_vec * mod, + g_c + tb_diag_vec * mod, + g_c - bt_diag_vec * mod, + ]) + + # get area of face square in global space + area = mathlib.polygon_area(l_t[:, 0], l_t[:, 1]) + + # calc side of square + side = np.float32(math.sqrt(area) / 2) + + # calc 3 points with unrotated square + l_t = np.array([g_c + [-side, -side], + g_c + [side, -side], + g_c + [side, side]]) + + # calc affine transform from 3 global space points to 3 local space points size of 'output_size' + pts2 = np.float32(((0, 0), (output_size, 0), (output_size, output_size))) + mat = cv2.getAffineTransform(l_t, pts2) + return mat + + +def get_rect_from_landmarks(image_landmarks): + mat = get_transform_mat(image_landmarks, 256, FaceType.FULL_NO_ALIGN) + + g_p = transform_points(np.float32([(0, 0), (255, 255)]), mat, True) + + (l, t, r, b) = g_p[0][0], g_p[0][1], g_p[1][0], g_p[1][1] + + return (l, t, r, b) + +def get_transform_mat_all(image_landmarks,uni_landmarks,output_size,scale=1,gcx=-0.02,gcy=0.15,face_type=FaceType.WHOLE_FACE): + if not isinstance(image_landmarks, np.ndarray): + image_landmarks = np.array (image_landmarks) + # estimate landmarks transform from global space to local aligned space with bounds [0..1] + + mat = umeyama(image_landmarks, uni_landmarks, True)[0:2] + + # get corner points in global space + g_p = transform_points ( np.float32([(0,0),(1,0),(1,1),(0,1),(0.5,0.5) ]) , mat, True) + g_c = g_p[4] + # calc diagonal vectors between corners in global space + + + tb_diag_vec = (g_p[2] - g_p[0]).astype(np.float32) + tb_diag_vec /= npla.norm(tb_diag_vec) + bt_diag_vec = (g_p[1] - g_p[3]).astype(np.float32) + bt_diag_vec /= npla.norm(bt_diag_vec) + + # calc modifier of diagonal vectors for scale and padding value + padding, remove_align = FaceType_to_padding_remove_align.get(face_type, 0.0) + mod = (1.0 / scale) * (npla.norm(g_p[0] - g_p[2]) * (padding * np.sqrt(2.0) + 0.5)) + + vec = (g_p[0]-g_p[3]).astype(np.float32) + vec_len = npla.norm(vec) + vec /= vec_len + g_c += vec*vec_len*[gcx,gcy] + + + # calc 3 points in global space to estimate 2d affine transform + if not remove_align: + l_t = np.array([g_c - tb_diag_vec * mod, + g_c + bt_diag_vec * mod, + g_c + tb_diag_vec * mod]) + else: + # remove_align - face will be centered in the frame but not aligned + l_t = np.array([g_c - tb_diag_vec * mod, + g_c + bt_diag_vec * mod, + g_c + tb_diag_vec * mod, + g_c - bt_diag_vec * mod, + ]) + + # get area of face square in global space + area = mathlib.polygon_area(l_t[:, 0], l_t[:, 1]) + + # calc side of square + side = np.float32(math.sqrt(area) / 2) + + # calc 3 points with unrotated square + l_t = np.array([g_c + [-side, -side], + g_c + [side, -side], + g_c + [side, side]]) + + # calc affine transform from 3 global space points to 3 local space points size of 'output_size' + pts2 = np.float32(((0, 0), (output_size, 0), (output_size, output_size))) + mat = cv2.getAffineTransform(l_t, pts2) + return mat + + +def expand_eyebrows(lmrks, eyebrows_expand_mod=1.0): + + if len(lmrks) != 68: + raise Exception('works only with 68 landmarks') + lmrks = np.array(lmrks.copy(), dtype=np.int) + + # #nose + ml_pnt = (lmrks[36] + lmrks[0]) // 2 + mr_pnt = (lmrks[16] + lmrks[45]) // 2 + + # mid points between the mid points and eye + ql_pnt = (lmrks[36] + ml_pnt) // 2 + qr_pnt = (lmrks[45] + mr_pnt) // 2 + + # Top of the eye arrays + bot_l = np.array((ql_pnt, lmrks[36], lmrks[37], lmrks[38], lmrks[39])) + bot_r = np.array((lmrks[42], lmrks[43], lmrks[44], lmrks[45], qr_pnt)) + + # Eyebrow arrays + top_l = lmrks[17:22] + top_r = lmrks[22:27] + + # Adjust eyebrow arrays + lmrks[17:22] = top_l + eyebrows_expand_mod * 0.5 * (top_l - bot_l) + lmrks[22:27] = top_r + eyebrows_expand_mod * 0.5 * (top_r - bot_r) + return lmrks + + +def get_image_hull_mask(image_shape, image_landmarks, eyebrows_expand_mod=1.0): + hull_mask = np.zeros(image_shape[0:2] + (1,), dtype=np.float32) + + lmrks = expand_eyebrows(image_landmarks, eyebrows_expand_mod) + + r_jaw = (lmrks[0:9], lmrks[17:18]) + l_jaw = (lmrks[8:17], lmrks[26:27]) + r_cheek = (lmrks[17:20], lmrks[8:9]) + l_cheek = (lmrks[24:27], lmrks[8:9]) + nose_ridge = (lmrks[19:25], lmrks[8:9],) + r_eye = (lmrks[17:22], lmrks[27:28], lmrks[31:36], lmrks[8:9]) + l_eye = (lmrks[22:27], lmrks[27:28], lmrks[31:36], lmrks[8:9]) + nose = (lmrks[27:31], lmrks[31:36]) + parts = [r_jaw, l_jaw, r_cheek, l_cheek, nose_ridge, r_eye, l_eye, nose] + + for item in parts: + merged = np.concatenate(item) + cv2.fillConvexPoly(hull_mask, cv2.convexHull(merged), (1,)) + + return hull_mask + + +def get_image_eye_mask(image_shape, image_landmarks): + if len(image_landmarks) != 68: + raise Exception('get_image_eye_mask works only with 68 landmarks') + + h, w, c = image_shape + + hull_mask = np.zeros((h, w, 1), dtype=np.float32) + + image_landmarks = image_landmarks.astype(np.int) + + cv2.fillConvexPoly(hull_mask, cv2.convexHull(image_landmarks[36:42]), (1,)) + cv2.fillConvexPoly(hull_mask, cv2.convexHull(image_landmarks[42:48]), (1,)) + + dilate = h // 32 + hull_mask = cv2.dilate(hull_mask, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (dilate, dilate)), iterations=1) + + blur = h // 16 + blur = blur + (1 - blur % 2) + hull_mask = cv2.GaussianBlur(hull_mask, (blur, blur), 0) + hull_mask = hull_mask[..., None] + + return hull_mask + + +def get_image_mouth_mask(image_shape, image_landmarks): + if len(image_landmarks) != 68: + raise Exception('get_image_eye_mask works only with 68 landmarks') + + h, w, c = image_shape + + hull_mask = np.zeros((h, w, 1), dtype=np.float32) + + image_landmarks = image_landmarks.astype(np.int) + + cv2.fillConvexPoly(hull_mask, cv2.convexHull(image_landmarks[60:]), (1,)) + + dilate = h // 32 + hull_mask = cv2.dilate(hull_mask, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (dilate, dilate)), iterations=1) + + blur = h // 16 + blur = blur + (1 - blur % 2) + hull_mask = cv2.GaussianBlur(hull_mask, (blur, blur), 0) + hull_mask = hull_mask[..., None] + + return hull_mask + + +def alpha_to_color(img_alpha, color): + if len(img_alpha.shape) == 2: + img_alpha = img_alpha[..., None] + h, w, c = img_alpha.shape + result = np.zeros((h, w, len(color)), dtype=np.float32) + result[:, :] = color + + return result * img_alpha + + +def get_cmask(image_shape, lmrks, eyebrows_expand_mod=1.0): + h, w, c = image_shape + + hull = get_image_hull_mask(image_shape, lmrks, eyebrows_expand_mod) + + result = np.zeros((h, w, 3), dtype=np.float32) + + def process(w, h, data): + d = {} + cur_lc = 0 + all_lines = [] + for s, pts_loop_ar in data: + lines = [] + for pts, loop in pts_loop_ar: + pts_len = len(pts) + lines.append([[pts[i], pts[(i + 1) % pts_len]] for i in range(pts_len - (0 if loop else 1))]) + lines = np.concatenate(lines) + + lc = lines.shape[0] + all_lines.append(lines) + d[s] = cur_lc, cur_lc + lc + cur_lc += lc + all_lines = np.concatenate(all_lines, 0) + + # calculate signed distance for all points and lines + line_count = all_lines.shape[0] + pts_count = w * h + + all_lines = np.repeat(all_lines[None, ...], pts_count, axis=0).reshape((pts_count * line_count, 2, 2)) + + pts = np.empty((h, w, line_count, 2), dtype=np.float32) + pts[..., 1] = np.arange(h)[:, None, None] + pts[..., 0] = np.arange(w)[:, None] + pts = pts.reshape((h * w * line_count, -1)) + + a = all_lines[:, 0, :] + b = all_lines[:, 1, :] + pa = pts - a + ba = b - a + ph = np.clip(np.einsum('ij,ij->i', pa, ba) / np.einsum('ij,ij->i', ba, ba), 0, 1) + dists = npla.norm(pa - ba * ph[..., None], axis=1).reshape((h, w, line_count)) + + def get_dists(name, thickness=0): + s, e = d[name] + result = dists[..., s:e] + if thickness != 0: + result = np.abs(result) - thickness + return np.min(result, axis=-1) + + return get_dists + + l_eye = lmrks[42:48] + r_eye = lmrks[36:42] + l_brow = lmrks[22:27] + r_brow = lmrks[17:22] + mouth = lmrks[48:60] + + up_nose = np.concatenate((lmrks[27:31], lmrks[33:34])) + down_nose = lmrks[31:36] + nose = np.concatenate((up_nose, down_nose)) + + gdf = process(w, h, + ( + ('eyes', ((l_eye, True), (r_eye, True))), + ('brows', ((l_brow, False), (r_brow, False))), + ('up_nose', ((up_nose, False),)), + ('down_nose', ((down_nose, False),)), + ('mouth', ((mouth, True),)), + ) + ) + + eyes_fall_dist = w // 32 + eyes_thickness = max(w // 64, 1) + + brows_fall_dist = w // 32 + brows_thickness = max(w // 256, 1) + + nose_fall_dist = w / 12 + nose_thickness = max(w // 96, 1) + + mouth_fall_dist = w // 32 + mouth_thickness = max(w // 64, 1) + + eyes_mask = gdf('eyes', eyes_thickness) + eyes_mask = 1 - np.clip(eyes_mask / eyes_fall_dist, 0, 1) + # eyes_mask = np.clip ( 1- ( np.sqrt( np.maximum(eyes_mask,0) ) / eyes_fall_dist ), 0, 1) + # eyes_mask = np.clip ( 1- ( np.cbrt( np.maximum(eyes_mask,0) ) / eyes_fall_dist ), 0, 1) + + brows_mask = gdf('brows', brows_thickness) + brows_mask = 1 - np.clip(brows_mask / brows_fall_dist, 0, 1) + # brows_mask = np.clip ( 1- ( np.sqrt( np.maximum(brows_mask,0) ) / brows_fall_dist ), 0, 1) + + mouth_mask = gdf('mouth', mouth_thickness) + mouth_mask = 1 - np.clip(mouth_mask / mouth_fall_dist, 0, 1) + + # mouth_mask = np.clip ( 1- ( np.sqrt( np.maximum(mouth_mask,0) ) / mouth_fall_dist ), 0, 1) + + def blend(a, b, k): + x = np.clip(0.5 + 0.5 * (b - a) / k, 0.0, 1.0) + return (a - b) * x + b - k * x * (1.0 - x) + + # nose_mask = (a-b)*x+b - k*x*(1.0-x) + + # nose_mask = np.minimum (up_nose_mask , down_nose_mask ) + # nose_mask = 1-np.clip( nose_mask / nose_fall_dist, 0, 1) + + nose_mask = blend(gdf('up_nose', nose_thickness), gdf('down_nose', nose_thickness), nose_thickness * 3) + nose_mask = 1 - np.clip(nose_mask / nose_fall_dist, 0, 1) + + up_nose_mask = gdf('up_nose', nose_thickness) + up_nose_mask = 1 - np.clip(up_nose_mask / nose_fall_dist, 0, 1) + # up_nose_mask = np.clip ( 1- ( np.cbrt( np.maximum(up_nose_mask,0) ) / nose_fall_dist ), 0, 1) + + down_nose_mask = gdf('down_nose', nose_thickness) + down_nose_mask = 1 - np.clip(down_nose_mask / nose_fall_dist, 0, 1) + # down_nose_mask = np.clip ( 1- ( np.cbrt( np.maximum(down_nose_mask,0) ) / nose_fall_dist ), 0, 1) + + # nose_mask = np.clip( up_nose_mask + down_nose_mask, 0, 1 ) + # nose_mask /= np.max(nose_mask) + # nose_mask = np.maximum (up_nose_mask , down_nose_mask ) + # nose_mask = down_nose_mask + + # nose_mask = np.zeros_like(nose_mask) + + eyes_mask = eyes_mask * (1 - mouth_mask) + nose_mask = nose_mask * (1 - eyes_mask) + + hull_mask = hull[..., 0].copy() + hull_mask = hull_mask * (1 - eyes_mask) * (1 - brows_mask) * (1 - nose_mask) * (1 - mouth_mask) + + # eyes_mask = eyes_mask * (1-nose_mask) + + mouth_mask = mouth_mask * (1 - nose_mask) + + brows_mask = brows_mask * (1 - nose_mask) * (1 - eyes_mask) + + hull_mask = alpha_to_color(hull_mask, (0, 1, 0)) + eyes_mask = alpha_to_color(eyes_mask, (1, 0, 0)) + brows_mask = alpha_to_color(brows_mask, (0, 0, 1)) + nose_mask = alpha_to_color(nose_mask, (0, 1, 1)) + mouth_mask = alpha_to_color(mouth_mask, (0, 0, 1)) + + # nose_mask = np.maximum( up_nose_mask, down_nose_mask ) + + result = hull_mask + mouth_mask + nose_mask + brows_mask + eyes_mask + result *= hull + # result = np.clip (result, 0, 1) + return result + + +def blur_image_hull_mask(hull_mask): + maxregion = np.argwhere(hull_mask == 1.0) + miny, minx = maxregion.min(axis=0)[:2] + maxy, maxx = maxregion.max(axis=0)[:2] + lenx = maxx - minx; + leny = maxy - miny; + masky = int(minx + (lenx // 2)) + maskx = int(miny + (leny // 2)) + lowest_len = min(lenx, leny) + ero = int(lowest_len * 0.085) + blur = int(lowest_len * 0.10) + + hull_mask = cv2.erode(hull_mask, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (ero, ero)), iterations=1) + hull_mask = cv2.blur(hull_mask, (blur, blur)) + hull_mask = np.expand_dims(hull_mask, -1) + + return hull_mask + + +mirror_idxs = [ + [0, 16], + [1, 15], + [2, 14], + [3, 13], + [4, 12], + [5, 11], + [6, 10], + [7, 9], + + [17, 26], + [18, 25], + [19, 24], + [20, 23], + [21, 22], + + [36, 45], + [37, 44], + [38, 43], + [39, 42], + [40, 47], + [41, 46], + + [31, 35], + [32, 34], + + [50, 52], + [49, 53], + [48, 54], + [59, 55], + [58, 56], + [67, 65], + [60, 64], + [61, 63]] + + +def mirror_landmarks(landmarks, val): + result = landmarks.copy() + + for idx in mirror_idxs: + result[idx] = result[idx[::-1]] + + result[:, 0] = val - result[:, 0] - 1 + return result + + +def get_face_struct_mask(image_shape, image_landmarks, eyebrows_expand_mod=1.0, color=(1,)): + mask = np.zeros(image_shape[0:2] + (len(color),), dtype=np.float32) + lmrks = expand_eyebrows(image_landmarks, eyebrows_expand_mod) + draw_landmarks(mask, image_landmarks, color=color, draw_circles=False, thickness=2) + return mask + + +def draw_landmarks(image, image_landmarks, color=(0, 255, 0), draw_circles=True, thickness=1, transparent_mask=False): + if len(image_landmarks) != 68: + raise Exception('get_image_eye_mask works only with 68 landmarks') + + int_lmrks = np.array(image_landmarks, dtype=np.int) + + jaw = int_lmrks[slice(*landmarks_68_pt["jaw"])] + right_eyebrow = int_lmrks[slice(*landmarks_68_pt["right_eyebrow"])] + left_eyebrow = int_lmrks[slice(*landmarks_68_pt["left_eyebrow"])] + mouth = int_lmrks[slice(*landmarks_68_pt["mouth"])] + right_eye = int_lmrks[slice(*landmarks_68_pt["right_eye"])] + left_eye = int_lmrks[slice(*landmarks_68_pt["left_eye"])] + nose = int_lmrks[slice(*landmarks_68_pt["nose"])] + + # open shapes + cv2.polylines(image, + tuple(np.array([v]) for v in (right_eyebrow, jaw, left_eyebrow, np.concatenate((nose, [nose[-6]])))), + False, color, thickness=thickness, lineType=cv2.LINE_AA) + # closed shapes + cv2.polylines(image, tuple(np.array([v]) for v in (right_eye, left_eye, mouth)), + True, color, thickness=thickness, lineType=cv2.LINE_AA) + + if draw_circles: + # the rest of the cicles + for x, y in np.concatenate((right_eyebrow, left_eyebrow, mouth, right_eye, left_eye, nose), axis=0): + cv2.circle(image, (x, y), 1, color, 1, lineType=cv2.LINE_AA) + # jaw big circles + for x, y in jaw: + cv2.circle(image, (x, y), 2, color, lineType=cv2.LINE_AA) + + if transparent_mask: + mask = get_image_hull_mask(image.shape, image_landmarks) + image[...] = (image * (1 - mask) + image * mask / 2)[...] + + +def draw_rect_landmarks(image, rect, image_landmarks, face_type, face_size=256, transparent_mask=False, + landmarks_color=(0, 255, 0)): + draw_landmarks(image, image_landmarks, color=landmarks_color, transparent_mask=transparent_mask) + imagelib.draw_rect(image, rect, (255, 0, 0), 2) + + image_to_face_mat = get_transform_mat(image_landmarks, face_size, face_type) + points = transform_points([(0, 0), (0, face_size - 1), (face_size - 1, face_size - 1), (face_size - 1, 0)], + image_to_face_mat, True) + imagelib.draw_polygon(image, points, (0, 0, 255), 2) + + points = transform_points( + [(int(face_size * 0.05), 0), (int(face_size * 0.1), int(face_size * 0.1)), (0, int(face_size * 0.1))], + image_to_face_mat, True) + imagelib.draw_polygon(image, points, (0, 0, 255), 2) + + +def calc_face_pitch(landmarks): + if not isinstance(landmarks, np.ndarray): + landmarks = np.array(landmarks) + t = ((landmarks[6][1] - landmarks[8][1]) + (landmarks[10][1] - landmarks[8][1])) / 2.0 + b = landmarks[8][1] + return float(b - t) + + +def estimate_averaged_yaw(landmarks): + # Works much better than solvePnP if landmarks from "3DFAN" + if not isinstance(landmarks, np.ndarray): + landmarks = np.array(landmarks) + l = ((landmarks[27][0] - landmarks[0][0]) + (landmarks[28][0] - landmarks[1][0]) + ( + landmarks[29][0] - landmarks[2][0])) / 3.0 + r = ((landmarks[16][0] - landmarks[27][0]) + (landmarks[15][0] - landmarks[28][0]) + ( + landmarks[14][0] - landmarks[29][0])) / 3.0 + return float(r - l) + + +def estimate_pitch_yaw_roll(aligned_landmarks, size=256): + """ + returns pitch,yaw,roll [-pi/2...+pi/2] + """ + shape = (size, size) + focal_length = shape[1] + camera_center = (shape[1] / 2, shape[0] / 2) + camera_matrix = np.array( + [[focal_length, 0, camera_center[0]], + [0, focal_length, camera_center[1]], + [0, 0, 1]], dtype=np.float32) + + (_, rotation_vector, _) = cv2.solvePnP( + np.concatenate((landmarks_68_3D[:27], landmarks_68_3D[30:36]), axis=0), + np.concatenate((aligned_landmarks[:27], aligned_landmarks[30:36]), axis=0).astype(np.float32), + camera_matrix, + np.zeros((4, 1))) + + pitch, yaw, roll = mathlib.rotationMatrixToEulerAngles(cv2.Rodrigues(rotation_vector)[0]) + + half_pi = math.pi / 2.0 + pitch = np.clip(pitch, -half_pi, half_pi) + yaw = np.clip(yaw, -half_pi, half_pi) + roll = np.clip(roll, -half_pi, half_pi) + + return -pitch, yaw, roll + + +# if remove_align: +# bbox = transform_points ( [ (0,0), (0,output_size), (output_size, output_size), (output_size,0) ], mat, True) +# #import code +# #code.interact(local=dict(globals(), **locals())) +# area = mathlib.polygon_area(bbox[:,0], bbox[:,1] ) +# side = math.sqrt(area) / 2 +# center = transform_points ( [(output_size/2,output_size/2)], mat, True) +# pts1 = np.float32(( center+[-side,-side], center+[side,-side], center+[side,-side] )) +# pts2 = np.float32([[0,0],[output_size,0],[0,output_size]]) +# mat = cv2.getAffineTransform(pts1,pts2) +# if full_face_align_top and (face_type == FaceType.FULL or face_type == FaceType.FULL_NO_ALIGN): +# #lmrks2 = expand_eyebrows(image_landmarks) +# #lmrks2_ = transform_points( [ lmrks2[19], lmrks2[24] ], mat, False ) +# #y_diff = np.float32( (0,np.min(lmrks2_[:,1])) ) +# #y_diff = transform_points( [ np.float32( (0,0) ), y_diff], mat, True) +# #y_diff = y_diff[1]-y_diff[0] +# +# x_diff = np.float32((0,0)) +# +# lmrks2_ = transform_points( [ image_landmarks[0], image_landmarks[16] ], mat, False ) +# if lmrks2_[0,0] < 0: +# x_diff = lmrks2_[0,0] +# x_diff = transform_points( [ np.float32( (0,0) ), np.float32((x_diff,0)) ], mat, True) +# x_diff = x_diff[1]-x_diff[0] +# elif lmrks2_[1,0] >= output_size: +# x_diff = lmrks2_[1,0]-(output_size-1) +# x_diff = transform_points( [ np.float32( (0,0) ), np.float32((x_diff,0)) ], mat, True) +# x_diff = x_diff[1]-x_diff[0] +# +# mat = cv2.getAffineTransform( l_t+y_diff+x_diff ,pts2) + + +""" +def get_averaged_transform_mat (img_landmarks, + img_landmarks_prev, + img_landmarks_next, + average_frame_count, + average_center_frame_count, + output_size, face_type, scale=1.0): + + l_c_list = [] + tb_diag_vec_list = [] + bt_diag_vec_list = [] + mod_list = [] + + count = max(average_frame_count,average_center_frame_count) + for i in range ( -count, count+1, 1 ): + if i < 0: + lmrks = img_landmarks_prev[i] if -i < len(img_landmarks_prev) else None + elif i > 0: + lmrks = img_landmarks_next[i] if i < len(img_landmarks_next) else None + else: + lmrks = img_landmarks + + if lmrks is None: + continue + + l_c, tb_diag_vec, bt_diag_vec, mod = get_transform_mat_data (lmrks, face_type, scale=scale) + + if i >= -average_frame_count and i <= average_frame_count: + tb_diag_vec_list.append(tb_diag_vec) + bt_diag_vec_list.append(bt_diag_vec) + mod_list.append(mod) + + if i >= -average_center_frame_count and i <= average_center_frame_count: + l_c_list.append(l_c) + + tb_diag_vec = np.mean( np.array(tb_diag_vec_list), axis=0 ) + bt_diag_vec = np.mean( np.array(bt_diag_vec_list), axis=0 ) + mod = np.mean( np.array(mod_list), axis=0 ) + l_c = np.mean( np.array(l_c_list), axis=0 ) + + return get_transform_mat_by_data (l_c, tb_diag_vec, bt_diag_vec, mod, output_size, face_type) + + +def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0): + if not isinstance(image_landmarks, np.ndarray): + image_landmarks = np.array (image_landmarks) + + # get face padding value for FaceType + padding, remove_align = FaceType_to_padding_remove_align.get(face_type, 0.0) + + # estimate landmarks transform from global space to local aligned space with bounds [0..1] + mat = umeyama( np.concatenate ( [ image_landmarks[17:49] , image_landmarks[54:55] ] ) , landmarks_2D_new, True)[0:2] + + # get corner points in global space + l_p = transform_points ( np.float32([(0,0),(1,0),(1,1),(0,1),(0.5,0.5)]) , mat, True) + l_c = l_p[4] + + # calc diagonal vectors between corners in global space + tb_diag_vec = (l_p[2]-l_p[0]).astype(np.float32) + tb_diag_vec /= npla.norm(tb_diag_vec) + bt_diag_vec = (l_p[1]-l_p[3]).astype(np.float32) + bt_diag_vec /= npla.norm(bt_diag_vec) + + # calc modifier of diagonal vectors for scale and padding value + mod = (1.0 / scale)* ( npla.norm(l_p[0]-l_p[2])*(padding*np.sqrt(2.0) + 0.5) ) + + # calc 3 points in global space to estimate 2d affine transform + if not remove_align: + l_t = np.array( [ np.round( l_c - tb_diag_vec*mod ), + np.round( l_c + bt_diag_vec*mod ), + np.round( l_c + tb_diag_vec*mod ) ] ) + else: + # remove_align - face will be centered in the frame but not aligned + l_t = np.array( [ np.round( l_c - tb_diag_vec*mod ), + np.round( l_c + bt_diag_vec*mod ), + np.round( l_c + tb_diag_vec*mod ), + np.round( l_c - bt_diag_vec*mod ), + ] ) + + # get area of face square in global space + area = mathlib.polygon_area(l_t[:,0], l_t[:,1] ) + + # calc side of square + side = np.float32(math.sqrt(area) / 2) + + # calc 3 points with unrotated square + l_t = np.array( [ np.round( l_c + [-side,-side] ), + np.round( l_c + [ side,-side] ), + np.round( l_c + [ side, side] ) ] ) + + # calc affine transform from 3 global space points to 3 local space points size of 'output_size' + pts2 = np.float32(( (0,0),(output_size,0),(output_size,output_size) )) + mat = cv2.getAffineTransform(l_t,pts2) + + return mat +""" diff --git a/face_detect/__init__.py b/face_detect/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2e8e58ddd298d9a9776e3a74822b57c500609155 --- /dev/null +++ b/face_detect/__init__.py @@ -0,0 +1,3 @@ +from .face_align_5_landmarks import FaceDetect5Landmarks +from .face_align_utils import estimate_norm + diff --git a/face_detect/core/imagelib/SegIEPolys.py b/face_detect/core/imagelib/SegIEPolys.py new file mode 100644 index 0000000000000000000000000000000000000000..1a4c3d29e724da56a3e151d22c2bbb95b421ce90 --- /dev/null +++ b/face_detect/core/imagelib/SegIEPolys.py @@ -0,0 +1,158 @@ +import numpy as np +import cv2 +from enum import IntEnum + + +class SegIEPolyType(IntEnum): + EXCLUDE = 0 + INCLUDE = 1 + + + +class SegIEPoly(): + def __init__(self, type=None, pts=None, **kwargs): + self.type = type + + if pts is None: + pts = np.empty( (0,2), dtype=np.float32 ) + else: + pts = np.float32(pts) + self.pts = pts + self.n_max = self.n = len(pts) + + def dump(self): + return {'type': int(self.type), + 'pts' : self.get_pts(), + } + + def identical(self, b): + if self.n != b.n: + return False + return (self.pts[0:self.n] == b.pts[0:b.n]).all() + + def get_type(self): + return self.type + + def add_pt(self, x, y): + self.pts = np.append(self.pts[0:self.n], [ ( float(x), float(y) ) ], axis=0).astype(np.float32) + self.n_max = self.n = self.n + 1 + + def undo(self): + self.n = max(0, self.n-1) + return self.n + + def redo(self): + self.n = min(len(self.pts), self.n+1) + return self.n + + def redo_clip(self): + self.pts = self.pts[0:self.n] + self.n_max = self.n + + def insert_pt(self, n, pt): + if n < 0 or n > self.n: + raise ValueError("insert_pt out of range") + self.pts = np.concatenate( (self.pts[0:n], pt[None,...].astype(np.float32), self.pts[n:]), axis=0) + self.n_max = self.n = self.n+1 + + def remove_pt(self, n): + if n < 0 or n >= self.n: + raise ValueError("remove_pt out of range") + self.pts = np.concatenate( (self.pts[0:n], self.pts[n+1:]), axis=0) + self.n_max = self.n = self.n-1 + + def get_last_point(self): + return self.pts[self.n-1].copy() + + def get_pts(self): + return self.pts[0:self.n].copy() + + def get_pts_count(self): + return self.n + + def set_point(self, id, pt): + self.pts[id] = pt + + def set_points(self, pts): + self.pts = np.array(pts) + self.n_max = self.n = len(pts) + + def mult_points(self, val): + self.pts *= val + + + +class SegIEPolys(): + def __init__(self): + self.polys = [] + + def identical(self, b): + polys_len = len(self.polys) + o_polys_len = len(b.polys) + if polys_len != o_polys_len: + return False + + return all ([ a_poly.identical(b_poly) for a_poly, b_poly in zip(self.polys, b.polys) ]) + + def add_poly(self, ie_poly_type): + poly = SegIEPoly(ie_poly_type) + self.polys.append (poly) + return poly + + def remove_poly(self, poly): + if poly in self.polys: + self.polys.remove(poly) + + def has_polys(self): + return len(self.polys) != 0 + + def get_poly(self, id): + return self.polys[id] + + def get_polys(self): + return self.polys + + def get_pts_count(self): + return sum([poly.get_pts_count() for poly in self.polys]) + + def sort(self): + poly_by_type = { SegIEPolyType.EXCLUDE : [], SegIEPolyType.INCLUDE : [] } + + for poly in self.polys: + poly_by_type[poly.type].append(poly) + + self.polys = poly_by_type[SegIEPolyType.INCLUDE] + poly_by_type[SegIEPolyType.EXCLUDE] + + def __iter__(self): + for poly in self.polys: + yield poly + + def overlay_mask(self, mask): + h,w,c = mask.shape + white = (1,)*c + black = (0,)*c + for poly in self.polys: + pts = poly.get_pts().astype(np.int32) + if len(pts) != 0: + cv2.fillPoly(mask, [pts], white if poly.type == SegIEPolyType.INCLUDE else black ) + + def dump(self): + return {'polys' : [ poly.dump() for poly in self.polys ] } + + def mult_points(self, val): + for poly in self.polys: + poly.mult_points(val) + + @staticmethod + def load(data=None): + ie_polys = SegIEPolys() + if data is not None: + if isinstance(data, list): + # Backward comp + ie_polys.polys = [ SegIEPoly(type=type, pts=pts) for (type, pts) in data ] + elif isinstance(data, dict): + ie_polys.polys = [ SegIEPoly(**poly_cfg) for poly_cfg in data['polys'] ] + + ie_polys.sort() + + return ie_polys \ No newline at end of file diff --git a/face_detect/core/imagelib/__init__.py b/face_detect/core/imagelib/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..799fd5ee8640bbe96d46ff133a8f533957c61de2 --- /dev/null +++ b/face_detect/core/imagelib/__init__.py @@ -0,0 +1,32 @@ +from .estimate_sharpness import estimate_sharpness + +from .equalize_and_stack_square import equalize_and_stack_square + +# from .text import get_text_image, get_draw_text_lines + +from .draw import draw_polygon, draw_rect + +from .morph import morph_by_points + +from .warp import gen_warp_params, warp_by_params + +from .reduce_colors import reduce_colors + +from .color_transfer import color_transfer, color_transfer_mix, color_transfer_sot, color_transfer_mkl, color_transfer_idt, color_hist_match, reinhard_color_transfer, linear_color_transfer + +from .common import random_crop, normalize_channels, cut_odd_image, overlay_alpha_image + +from .SegIEPolys import * + +from .blursharpen import LinearMotionBlur, blursharpen + +from .filters import apply_random_rgb_levels, \ + apply_random_overlay_triangle, \ + apply_random_hsv_shift, \ + apply_random_sharpen, \ + apply_random_motion_blur, \ + apply_random_gaussian_blur, \ + apply_random_nearest_resize, \ + apply_random_bilinear_resize, \ + apply_random_jpeg_compress, \ + apply_random_relight diff --git a/face_detect/core/imagelib/blursharpen.py b/face_detect/core/imagelib/blursharpen.py new file mode 100644 index 0000000000000000000000000000000000000000..51745119f7066d6ba4e57dcbc435119b28b03983 --- /dev/null +++ b/face_detect/core/imagelib/blursharpen.py @@ -0,0 +1,38 @@ +import cv2 +import numpy as np + +def LinearMotionBlur(image, size, angle): + k = np.zeros((size, size), dtype=np.float32) + k[ (size-1)// 2 , :] = np.ones(size, dtype=np.float32) + k = cv2.warpAffine(k, cv2.getRotationMatrix2D( (size / 2 -0.5 , size / 2 -0.5 ) , angle, 1.0), (size, size) ) + k = k * ( 1.0 / np.sum(k) ) + return cv2.filter2D(image, -1, k) + +def blursharpen (img, sharpen_mode=0, kernel_size=3, amount=100): + if kernel_size % 2 == 0: + kernel_size += 1 + if amount > 0: + if sharpen_mode == 1: #box + kernel = np.zeros( (kernel_size, kernel_size), dtype=np.float32) + kernel[ kernel_size//2, kernel_size//2] = 1.0 + box_filter = np.ones( (kernel_size, kernel_size), dtype=np.float32) / (kernel_size**2) + kernel = kernel + (kernel - box_filter) * amount + return cv2.filter2D(img, -1, kernel) + elif sharpen_mode == 2: #gaussian + blur = cv2.GaussianBlur(img, (kernel_size, kernel_size) , 0) + img = cv2.addWeighted(img, 1.0 + (0.5 * amount), blur, -(0.5 * amount), 0) + return img + elif amount < 0: + n = -amount + while n > 0: + + img_blur = cv2.medianBlur(img, 5) + if int(n / 10) != 0: + img = img_blur + else: + pass_power = (n % 10) / 10.0 + img = img*(1.0-pass_power)+img_blur*pass_power + n = max(n-10,0) + + return img + return img \ No newline at end of file diff --git a/face_detect/core/imagelib/color_transfer.py b/face_detect/core/imagelib/color_transfer.py new file mode 100644 index 0000000000000000000000000000000000000000..db71ce5ef034fd1461a3f1595e67704aae620068 --- /dev/null +++ b/face_detect/core/imagelib/color_transfer.py @@ -0,0 +1,340 @@ +import cv2 +import numexpr as ne +import numpy as np +from numpy import linalg as npla +import scipy as sp + + +def color_transfer_sot(src, trg, steps=10, batch_size=5, reg_sigmaXY=16.0, reg_sigmaV=5.0): + """ + Color Transform via Sliced Optimal Transfer + ported by @iperov from https://github.com/dcoeurjo/OTColorTransfer + + src - any float range any channel image + dst - any float range any channel image, same shape as src + steps - number of solver steps + batch_size - solver batch size + reg_sigmaXY - apply regularization and sigmaXY of filter, otherwise set to 0.0 + reg_sigmaV - sigmaV of filter + + return value - clip it manually + """ + if not np.issubdtype(src.dtype, np.floating): + raise ValueError("src value must be float") + if not np.issubdtype(trg.dtype, np.floating): + raise ValueError("trg value must be float") + + if len(src.shape) != 3: + raise ValueError("src shape must have rank 3 (h,w,c)") + + if src.shape != trg.shape: + raise ValueError("src and trg shapes must be equal") + + src_dtype = src.dtype + h, w, c = src.shape + new_src = src.copy() + + advect = np.empty((h * w, c), dtype=src_dtype) + for step in range(steps): + advect.fill(0) + for batch in range(batch_size): + dir = np.random.normal(size=c).astype(src_dtype) + dir /= npla.norm(dir) + + projsource = np.sum(new_src * dir, axis=-1).reshape((h * w)) + projtarget = np.sum(trg * dir, axis=-1).reshape((h * w)) + + idSource = np.argsort(projsource) + idTarget = np.argsort(projtarget) + + a = projtarget[idTarget] - projsource[idSource] + for i_c in range(c): + advect[idSource, i_c] += a * dir[i_c] + new_src += advect.reshape((h, w, c)) / batch_size + + if reg_sigmaXY != 0.0: + src_diff = new_src - src + src_diff_filt = cv2.bilateralFilter(src_diff, 0, reg_sigmaV, reg_sigmaXY) + if len(src_diff_filt.shape) == 2: + src_diff_filt = src_diff_filt[..., None] + new_src = src + src_diff_filt + return new_src + + +def color_transfer_mkl(x0, x1): + eps = np.finfo(float).eps + + h, w, c = x0.shape + h1, w1, c1 = x1.shape + + x0 = x0.reshape((h * w, c)) + x1 = x1.reshape((h1 * w1, c1)) + + a = np.cov(x0.T) + b = np.cov(x1.T) + + Da2, Ua = np.linalg.eig(a) + Da = np.diag(np.sqrt(Da2.clip(eps, None))) + + C = np.dot(np.dot(np.dot(np.dot(Da, Ua.T), b), Ua), Da) + + Dc2, Uc = np.linalg.eig(C) + Dc = np.diag(np.sqrt(Dc2.clip(eps, None))) + + Da_inv = np.diag(1. / (np.diag(Da))) + + t = np.dot(np.dot(np.dot(np.dot(np.dot(np.dot(Ua, Da_inv), Uc), Dc), Uc.T), Da_inv), Ua.T) + + mx0 = np.mean(x0, axis=0) + mx1 = np.mean(x1, axis=0) + + result = np.dot(x0 - mx0, t) + mx1 + return np.clip(result.reshape((h, w, c)).astype(x0.dtype), 0, 1) + + +def color_transfer_idt(i0, i1, bins=256, n_rot=20): + import scipy.stats + + relaxation = 1 / n_rot + h, w, c = i0.shape + h1, w1, c1 = i1.shape + + i0 = i0.reshape((h * w, c)) + i1 = i1.reshape((h1 * w1, c1)) + + n_dims = c + + d0 = i0.T + d1 = i1.T + + for i in range(n_rot): + + r = sp.stats.special_ortho_group.rvs(n_dims).astype(np.float32) + + d0r = np.dot(r, d0) + d1r = np.dot(r, d1) + d_r = np.empty_like(d0) + + for j in range(n_dims): + lo = min(d0r[j].min(), d1r[j].min()) + hi = max(d0r[j].max(), d1r[j].max()) + + p0r, edges = np.histogram(d0r[j], bins=bins, range=[lo, hi]) + p1r, _ = np.histogram(d1r[j], bins=bins, range=[lo, hi]) + + cp0r = p0r.cumsum().astype(np.float32) + cp0r /= cp0r[-1] + + cp1r = p1r.cumsum().astype(np.float32) + cp1r /= cp1r[-1] + + f = np.interp(cp0r, cp1r, edges[1:]) + + d_r[j] = np.interp(d0r[j], edges[1:], f, left=0, right=bins) + + d0 = relaxation * np.linalg.solve(r, (d_r - d0r)) + d0 + + return np.clip(d0.T.reshape((h, w, c)).astype(i0.dtype), 0, 1) + + +def reinhard_color_transfer(target: np.ndarray, source: np.ndarray, target_mask: np.ndarray = None, + source_mask: np.ndarray = None, mask_cutoff=0.5) -> np.ndarray: + """ + Transfer color using rct method. + target np.ndarray H W 3C (BGR) np.float32 + source np.ndarray H W 3C (BGR) np.float32 + target_mask(None) np.ndarray H W 1C np.float32 + source_mask(None) np.ndarray H W 1C np.float32 + + mask_cutoff(0.5) float + masks are used to limit the space where color statistics will be computed to adjust the target + reference: Color Transfer between Images https://www.cs.tau.ac.il/~turkel/imagepapers/ColorTransfer.pdf + """ + source = cv2.cvtColor(source, cv2.COLOR_BGR2LAB) + target = cv2.cvtColor(target, cv2.COLOR_BGR2LAB) + + source_input = source + if source_mask is not None: + source_input = source_input.copy() + source_input[source_mask[..., 0] < mask_cutoff] = [0, 0, 0] + + target_input = target + if target_mask is not None: + target_input = target_input.copy() + target_input[target_mask[..., 0] < mask_cutoff] = [0, 0, 0] + + target_l_mean, target_l_std, target_a_mean, target_a_std, target_b_mean, target_b_std, \ + = target_input[..., 0].mean(), target_input[..., 0].std(), target_input[..., 1].mean(), target_input[ + ..., 1].std(), target_input[..., 2].mean(), target_input[..., 2].std() + + source_l_mean, source_l_std, source_a_mean, source_a_std, source_b_mean, source_b_std, \ + = source_input[..., 0].mean(), source_input[..., 0].std(), source_input[..., 1].mean(), source_input[ + ..., 1].std(), source_input[..., 2].mean(), source_input[..., 2].std() + + # not as in the paper: scale by the standard deviations using reciprocal of paper proposed factor + target_l = target[..., 0] + target_l = ne.evaluate('(target_l - target_l_mean) * source_l_std / target_l_std + source_l_mean') + + target_a = target[..., 1] + target_a = ne.evaluate('(target_a - target_a_mean) * source_a_std / target_a_std + source_a_mean') + + target_b = target[..., 2] + target_b = ne.evaluate('(target_b - target_b_mean) * source_b_std / target_b_std + source_b_mean') + + np.clip(target_l, 0, 100, out=target_l) + np.clip(target_a, -127, 127, out=target_a) + np.clip(target_b, -127, 127, out=target_b) + + return cv2.cvtColor(np.stack([target_l, target_a, target_b], -1), cv2.COLOR_LAB2BGR) + +def linear_color_transfer(target_img, source_img, mode='pca', eps=1e-5): + ''' + Matches the colour distribution of the target image to that of the source image + using a linear transform. + Images are expected to be of form (w,h,c) and float in [0,1]. + Modes are chol, pca or sym for different choices of basis. + ''' + mu_t = target_img.mean(0).mean(0) + t = target_img - mu_t + t = t.transpose(2, 0, 1).reshape(t.shape[-1], -1) + Ct = t.dot(t.T) / t.shape[1] + eps * np.eye(t.shape[0]) + mu_s = source_img.mean(0).mean(0) + s = source_img - mu_s + s = s.transpose(2, 0, 1).reshape(s.shape[-1], -1) + Cs = s.dot(s.T) / s.shape[1] + eps * np.eye(s.shape[0]) + if mode == 'chol': + chol_t = np.linalg.cholesky(Ct) + chol_s = np.linalg.cholesky(Cs) + ts = chol_s.dot(np.linalg.inv(chol_t)).dot(t) + if mode == 'pca': + eva_t, eve_t = np.linalg.eigh(Ct) + Qt = eve_t.dot(np.sqrt(np.diag(eva_t))).dot(eve_t.T) + eva_s, eve_s = np.linalg.eigh(Cs) + Qs = eve_s.dot(np.sqrt(np.diag(eva_s))).dot(eve_s.T) + ts = Qs.dot(np.linalg.inv(Qt)).dot(t) + if mode == 'sym': + eva_t, eve_t = np.linalg.eigh(Ct) + Qt = eve_t.dot(np.sqrt(np.diag(eva_t))).dot(eve_t.T) + Qt_Cs_Qt = Qt.dot(Cs).dot(Qt) + eva_QtCsQt, eve_QtCsQt = np.linalg.eigh(Qt_Cs_Qt) + QtCsQt = eve_QtCsQt.dot(np.sqrt(np.diag(eva_QtCsQt))).dot(eve_QtCsQt.T) + ts = np.linalg.inv(Qt).dot(QtCsQt).dot(np.linalg.inv(Qt)).dot(t) + matched_img = ts.reshape(*target_img.transpose(2, 0, 1).shape).transpose(1, 2, 0) + matched_img += mu_s + matched_img[matched_img > 1] = 1 + matched_img[matched_img < 0] = 0 + return np.clip(matched_img.astype(source_img.dtype), 0, 1) + + +def lab_image_stats(image): + # compute the mean and standard deviation of each channel + (l, a, b) = cv2.split(image) + (lMean, lStd) = (l.mean(), l.std()) + (aMean, aStd) = (a.mean(), a.std()) + (bMean, bStd) = (b.mean(), b.std()) + + # return the color statistics + return (lMean, lStd, aMean, aStd, bMean, bStd) + + +def _scale_array(arr, clip=True): + if clip: + return np.clip(arr, 0, 255) + + mn = arr.min() + mx = arr.max() + scale_range = (max([mn, 0]), min([mx, 255])) + + if mn < scale_range[0] or mx > scale_range[1]: + return (scale_range[1] - scale_range[0]) * (arr - mn) / (mx - mn) + scale_range[0] + + return arr + + +def channel_hist_match(source, template, hist_match_threshold=255, mask=None): + # Code borrowed from: + # https://stackoverflow.com/questions/32655686/histogram-matching-of-two-images-in-python-2-x + masked_source = source + masked_template = template + + if mask is not None: + masked_source = source * mask + masked_template = template * mask + + oldshape = source.shape + source = source.ravel() + template = template.ravel() + masked_source = masked_source.ravel() + masked_template = masked_template.ravel() + s_values, bin_idx, s_counts = np.unique(source, return_inverse=True, + return_counts=True) + t_values, t_counts = np.unique(template, return_counts=True) + + s_quantiles = np.cumsum(s_counts).astype(np.float64) + s_quantiles = hist_match_threshold * s_quantiles / s_quantiles[-1] + t_quantiles = np.cumsum(t_counts).astype(np.float64) + t_quantiles = 255 * t_quantiles / t_quantiles[-1] + interp_t_values = np.interp(s_quantiles, t_quantiles, t_values) + + return interp_t_values[bin_idx].reshape(oldshape) + + +def color_hist_match(src_im, tar_im, hist_match_threshold=255): + h, w, c = src_im.shape + matched_R = channel_hist_match(src_im[:, :, 0], tar_im[:, :, 0], hist_match_threshold, None) + matched_G = channel_hist_match(src_im[:, :, 1], tar_im[:, :, 1], hist_match_threshold, None) + matched_B = channel_hist_match(src_im[:, :, 2], tar_im[:, :, 2], hist_match_threshold, None) + + to_stack = (matched_R, matched_G, matched_B) + for i in range(3, c): + to_stack += (src_im[:, :, i],) + + matched = np.stack(to_stack, axis=-1).astype(src_im.dtype) + return matched + + +def color_transfer_mix(img_src, img_trg): + img_src = np.clip(img_src * 255.0, 0, 255).astype(np.uint8) + img_trg = np.clip(img_trg * 255.0, 0, 255).astype(np.uint8) + + img_src_lab = cv2.cvtColor(img_src, cv2.COLOR_BGR2LAB) + img_trg_lab = cv2.cvtColor(img_trg, cv2.COLOR_BGR2LAB) + + rct_light = np.clip(linear_color_transfer(img_src_lab[..., 0:1].astype(np.float32) / 255.0, + img_trg_lab[..., 0:1].astype(np.float32) / 255.0)[..., 0] * 255.0, + 0, 255).astype(np.uint8) + + img_src_lab[..., 0] = (np.ones_like(rct_light) * 100).astype(np.uint8) + img_src_lab = cv2.cvtColor(img_src_lab, cv2.COLOR_LAB2BGR) + + img_trg_lab[..., 0] = (np.ones_like(rct_light) * 100).astype(np.uint8) + img_trg_lab = cv2.cvtColor(img_trg_lab, cv2.COLOR_LAB2BGR) + + img_rct = color_transfer_sot(img_src_lab.astype(np.float32), img_trg_lab.astype(np.float32)) + img_rct = np.clip(img_rct, 0, 255).astype(np.uint8) + + img_rct = cv2.cvtColor(img_rct, cv2.COLOR_BGR2LAB) + img_rct[..., 0] = rct_light + img_rct = cv2.cvtColor(img_rct, cv2.COLOR_LAB2BGR) + + return (img_rct / 255.0).astype(np.float32) + + +def color_transfer(ct_mode, img_src, img_trg): + """ + color transfer for [0,1] float32 inputs + """ + if ct_mode == 'lct': + out = linear_color_transfer(img_src, img_trg) + elif ct_mode == 'rct': + out = reinhard_color_transfer(img_src, img_trg) + elif ct_mode == 'mkl': + out = color_transfer_mkl(img_src, img_trg) + elif ct_mode == 'idt': + out = color_transfer_idt(img_src, img_trg) + elif ct_mode == 'sot': + out = color_transfer_sot(img_src, img_trg) + out = np.clip(out, 0.0, 1.0) + else: + raise ValueError(f"unknown ct_mode {ct_mode}") + return out diff --git a/face_detect/core/imagelib/common.py b/face_detect/core/imagelib/common.py new file mode 100644 index 0000000000000000000000000000000000000000..e77e20119f162028c65b01d402404254a672f331 --- /dev/null +++ b/face_detect/core/imagelib/common.py @@ -0,0 +1,62 @@ +import numpy as np + + +def random_crop(img, w, h): + height, width = img.shape[:2] + + h_rnd = height - h + w_rnd = width - w + + y = np.random.randint(0, h_rnd) if h_rnd > 0 else 0 + x = np.random.randint(0, w_rnd) if w_rnd > 0 else 0 + + return img[y:y + height, x:x + width] + + +def normalize_channels(img, target_channels): + img_shape_len = len(img.shape) + if img_shape_len == 2: + h, w = img.shape + c = 0 + elif img_shape_len == 3: + h, w, c = img.shape + else: + raise ValueError("normalize: incorrect image dimensions.") + + if c == 0 and target_channels > 0: + img = img[..., np.newaxis] + c = 1 + + if c == 1 and target_channels > 1: + img = np.repeat(img, target_channels, -1) + c = target_channels + + if c > target_channels: + img = img[..., 0:target_channels] + c = target_channels + + return img + + +def cut_odd_image(img): + h, w, c = img.shape + wm, hm = w % 2, h % 2 + if wm + hm != 0: + img = img[0:h - hm, 0:w - wm, :] + return img + + +def overlay_alpha_image(img_target, img_source, xy_offset=(0, 0)): + (h, w, c) = img_source.shape + if c != 4: + raise ValueError("overlay_alpha_image, img_source must have 4 channels") + + x1, x2 = xy_offset[0], xy_offset[0] + w + y1, y2 = xy_offset[1], xy_offset[1] + h + + alpha_s = img_source[:, :, 3] / 255.0 + alpha_l = 1.0 - alpha_s + + for c in range(0, 3): + img_target[y1:y2, x1:x2, c] = (alpha_s * img_source[:, :, c] + + alpha_l * img_target[y1:y2, x1:x2, c]) diff --git a/face_detect/core/imagelib/draw.py b/face_detect/core/imagelib/draw.py new file mode 100644 index 0000000000000000000000000000000000000000..3de1191735bc8135c06178afde810f50d95da077 --- /dev/null +++ b/face_detect/core/imagelib/draw.py @@ -0,0 +1,13 @@ +import numpy as np +import cv2 + +def draw_polygon (image, points, color, thickness = 1): + points_len = len(points) + for i in range (0, points_len): + p0 = tuple( points[i] ) + p1 = tuple( points[ (i+1) % points_len] ) + cv2.line (image, p0, p1, color, thickness=thickness) + +def draw_rect(image, rect, color, thickness=1): + l,t,r,b = rect + draw_polygon (image, [ (l,t), (r,t), (r,b), (l,b ) ], color, thickness) diff --git a/face_detect/core/imagelib/equalize_and_stack_square.py b/face_detect/core/imagelib/equalize_and_stack_square.py new file mode 100644 index 0000000000000000000000000000000000000000..31c435a0714c0525fa6dc6bb84a685fc8102396d --- /dev/null +++ b/face_detect/core/imagelib/equalize_and_stack_square.py @@ -0,0 +1,45 @@ +import numpy as np +import cv2 + +def equalize_and_stack_square (images, axis=1): + max_c = max ([ 1 if len(image.shape) == 2 else image.shape[2] for image in images ] ) + + target_wh = 99999 + for i,image in enumerate(images): + if len(image.shape) == 2: + h,w = image.shape + c = 1 + else: + h,w,c = image.shape + + if h < target_wh: + target_wh = h + + if w < target_wh: + target_wh = w + + for i,image in enumerate(images): + if len(image.shape) == 2: + h,w = image.shape + c = 1 + else: + h,w,c = image.shape + + if c < max_c: + if c == 1: + if len(image.shape) == 2: + image = np.expand_dims ( image, -1 ) + image = np.concatenate ( (image,)*max_c, -1 ) + elif c == 2: #GA + image = np.expand_dims ( image[...,0], -1 ) + image = np.concatenate ( (image,)*max_c, -1 ) + else: + image = np.concatenate ( (image, np.ones((h,w,max_c - c))), -1 ) + + if h != target_wh or w != target_wh: + image = cv2.resize ( image, (target_wh, target_wh) ) + h,w,c = image.shape + + images[i] = image + + return np.concatenate ( images, axis = 1 ) \ No newline at end of file diff --git a/face_detect/core/imagelib/estimate_sharpness.py b/face_detect/core/imagelib/estimate_sharpness.py new file mode 100644 index 0000000000000000000000000000000000000000..e4b3e2dce92cc55cf7bccea633f548db0557d40d --- /dev/null +++ b/face_detect/core/imagelib/estimate_sharpness.py @@ -0,0 +1,278 @@ +""" +Copyright (c) 2009-2010 Arizona Board of Regents. All Rights Reserved. + Contact: Lina Karam (karam@asu.edu) and Niranjan Narvekar (nnarveka@asu.edu) + Image, Video, and Usabilty (IVU) Lab, http://ivulab.asu.edu , Arizona State University + This copyright statement may not be removed from any file containing it or from modifications to these files. + This copyright notice must also be included in any file or product that is derived from the source files. + + Redistribution and use of this code in source and binary forms, with or without modification, are permitted provided that the + following conditions are met: + - Redistribution's of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + - Redistribution's in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the distribution. + - The Image, Video, and Usability Laboratory (IVU Lab, http://ivulab.asu.edu) is acknowledged in any publication that + reports research results using this code, copies of this code, or modifications of this code. + The code and our papers are to be cited in the bibliography as: + +N. D. Narvekar and L. J. Karam, "CPBD Sharpness Metric Software", http://ivulab.asu.edu/Quality/CPBD + +N. D. Narvekar and L. J. Karam, "A No-Reference Image Blur Metric Based on the Cumulative +Probability of Blur Detection (CPBD)," accepted and to appear in the IEEE Transactions on Image Processing, 2011. + +N. D. Narvekar and L. J. Karam, "An Improved No-Reference Sharpness Metric Based on the Probability of Blur Detection," International Workshop on Video Processing and Quality Metrics for Consumer Electronics (VPQM), January 2010, http://www.vpqm.org (pdf) + +N. D. Narvekar and L. J. Karam, "A No Reference Perceptual Quality Metric based on Cumulative Probability of Blur Detection," First International Workshop on the Quality of Multimedia Experience (QoMEX), pp. 87-91, July 2009. + + DISCLAIMER: + This software is provided by the copyright holders and contributors "as is" and any express or implied warranties, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose are disclaimed. In no event shall the Arizona Board of Regents, Arizona State University, IVU Lab members, authors or contributors be liable for any direct, indirect, incidental, special, exemplary, or consequential damages (including, but not limited to, procurement of substitute +goods or services; loss of use, data, or profits; or business interruption) however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence or otherwise) arising in any way out of the use of this software, even if advised of the possibility of such damage. +""" + +import numpy as np +import cv2 +from math import atan2, pi + + +def sobel(image): + # type: (numpy.ndarray) -> numpy.ndarray + """ + Find edges using the Sobel approximation to the derivatives. + + Inspired by the [Octave implementation](https://sourceforge.net/p/octave/image/ci/default/tree/inst/edge.m#l196). + """ + from skimage.filters.edges import HSOBEL_WEIGHTS + h1 = np.array(HSOBEL_WEIGHTS) + h1 /= np.sum(abs(h1)) # normalize h1 + + from scipy.ndimage import convolve + strength2 = np.square(convolve(image, h1.T)) + + # Note: https://sourceforge.net/p/octave/image/ci/default/tree/inst/edge.m#l59 + thresh2 = 2 * np.sqrt(np.mean(strength2)) + + strength2[strength2 <= thresh2] = 0 + return _simple_thinning(strength2) + + +def _simple_thinning(strength): + # type: (numpy.ndarray) -> numpy.ndarray + """ + Perform a very simple thinning. + + Inspired by the [Octave implementation](https://sourceforge.net/p/octave/image/ci/default/tree/inst/edge.m#l512). + """ + num_rows, num_cols = strength.shape + + zero_column = np.zeros((num_rows, 1)) + zero_row = np.zeros((1, num_cols)) + + x = ( + (strength > np.c_[zero_column, strength[:, :-1]]) & + (strength > np.c_[strength[:, 1:], zero_column]) + ) + + y = ( + (strength > np.r_[zero_row, strength[:-1, :]]) & + (strength > np.r_[strength[1:, :], zero_row]) + ) + + return x | y + + + + + +# threshold to characterize blocks as edge/non-edge blocks +THRESHOLD = 0.002 +# fitting parameter +BETA = 3.6 +# block size +BLOCK_HEIGHT, BLOCK_WIDTH = (64, 64) +# just noticeable widths based on the perceptual experiments +WIDTH_JNB = np.concatenate([5*np.ones(51), 3*np.ones(205)]) + + +def compute(image): + # type: (numpy.ndarray) -> float + """Compute the sharpness metric for the given data.""" + + # convert the image to double for further processing + image = image.astype(np.float64) + + # edge detection using canny and sobel canny edge detection is done to + # classify the blocks as edge or non-edge blocks and sobel edge + # detection is done for the purpose of edge width measurement. + from skimage.feature import canny + canny_edges = canny(image) + sobel_edges = sobel(image) + + # edge width calculation + marziliano_widths = marziliano_method(sobel_edges, image) + + # sharpness metric calculation + return _calculate_sharpness_metric(image, canny_edges, marziliano_widths) + + +def marziliano_method(edges, image): + # type: (numpy.ndarray, numpy.ndarray) -> numpy.ndarray + """ + Calculate the widths of the given edges. + + :return: A matrix with the same dimensions as the given image with 0's at + non-edge locations and edge-widths at the edge locations. + """ + + # `edge_widths` consists of zero and non-zero values. A zero value + # indicates that there is no edge at that position and a non-zero value + # indicates that there is an edge at that position and the value itself + # gives the edge width. + edge_widths = np.zeros(image.shape) + + # find the gradient for the image + gradient_y, gradient_x = np.gradient(image) + + # dimensions of the image + img_height, img_width = image.shape + + # holds the angle information of the edges + edge_angles = np.zeros(image.shape) + + # calculate the angle of the edges + for row in range(img_height): + for col in range(img_width): + if gradient_x[row, col] != 0: + edge_angles[row, col] = atan2(gradient_y[row, col], gradient_x[row, col]) * (180 / pi) + elif gradient_x[row, col] == 0 and gradient_y[row, col] == 0: + edge_angles[row,col] = 0 + elif gradient_x[row, col] == 0 and gradient_y[row, col] == pi/2: + edge_angles[row, col] = 90 + + + if np.any(edge_angles): + + # quantize the angle + quantized_angles = 45 * np.round(edge_angles / 45) + + for row in range(1, img_height - 1): + for col in range(1, img_width - 1): + if edges[row, col] == 1: + + # gradient angle = 180 or -180 + if quantized_angles[row, col] == 180 or quantized_angles[row, col] == -180: + for margin in range(100 + 1): + inner_border = (col - 1) - margin + outer_border = (col - 2) - margin + + # outside image or intensity increasing from left to right + if outer_border < 0 or (image[row, outer_border] - image[row, inner_border]) <= 0: + break + + width_left = margin + 1 + + for margin in range(100 + 1): + inner_border = (col + 1) + margin + outer_border = (col + 2) + margin + + # outside image or intensity increasing from left to right + if outer_border >= img_width or (image[row, outer_border] - image[row, inner_border]) >= 0: + break + + width_right = margin + 1 + + edge_widths[row, col] = width_left + width_right + + + # gradient angle = 0 + if quantized_angles[row, col] == 0: + for margin in range(100 + 1): + inner_border = (col - 1) - margin + outer_border = (col - 2) - margin + + # outside image or intensity decreasing from left to right + if outer_border < 0 or (image[row, outer_border] - image[row, inner_border]) >= 0: + break + + width_left = margin + 1 + + for margin in range(100 + 1): + inner_border = (col + 1) + margin + outer_border = (col + 2) + margin + + # outside image or intensity decreasing from left to right + if outer_border >= img_width or (image[row, outer_border] - image[row, inner_border]) <= 0: + break + + width_right = margin + 1 + + edge_widths[row, col] = width_right + width_left + + return edge_widths + + +def _calculate_sharpness_metric(image, edges, edge_widths): + # type: (numpy.array, numpy.array, numpy.array) -> numpy.float64 + + # get the size of image + img_height, img_width = image.shape + + total_num_edges = 0 + hist_pblur = np.zeros(101) + + # maximum block indices + num_blocks_vertically = int(img_height / BLOCK_HEIGHT) + num_blocks_horizontally = int(img_width / BLOCK_WIDTH) + + # loop over the blocks + for i in range(num_blocks_vertically): + for j in range(num_blocks_horizontally): + + # get the row and col indices for the block pixel positions + rows = slice(BLOCK_HEIGHT * i, BLOCK_HEIGHT * (i + 1)) + cols = slice(BLOCK_WIDTH * j, BLOCK_WIDTH * (j + 1)) + + if is_edge_block(edges[rows, cols], THRESHOLD): + block_widths = edge_widths[rows, cols] + # rotate block to simulate column-major boolean indexing + block_widths = np.rot90(np.flipud(block_widths), 3) + block_widths = block_widths[block_widths != 0] + + block_contrast = get_block_contrast(image[rows, cols]) + block_jnb = WIDTH_JNB[block_contrast] + + # calculate the probability of blur detection at the edges + # detected in the block + prob_blur_detection = 1 - np.exp(-abs(block_widths/block_jnb) ** BETA) + + # update the statistics using the block information + for probability in prob_blur_detection: + bucket = int(round(probability * 100)) + hist_pblur[bucket] += 1 + total_num_edges += 1 + + # normalize the pdf + if total_num_edges > 0: + hist_pblur = hist_pblur / total_num_edges + + # calculate the sharpness metric + return np.sum(hist_pblur[:64]) + + +def is_edge_block(block, threshold): + # type: (numpy.ndarray, float) -> bool + """Decide whether the given block is an edge block.""" + return np.count_nonzero(block) > (block.size * threshold) + + +def get_block_contrast(block): + # type: (numpy.ndarray) -> int + return int(np.max(block) - np.min(block)) + + +def estimate_sharpness(image): + if image.ndim == 3: + if image.shape[2] > 1: + image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + else: + image = image[...,0] + + return compute(image) diff --git a/face_detect/core/imagelib/filters.py b/face_detect/core/imagelib/filters.py new file mode 100644 index 0000000000000000000000000000000000000000..6b6957630bf478a39e0467d734a5fe1ec90ad5ed --- /dev/null +++ b/face_detect/core/imagelib/filters.py @@ -0,0 +1,245 @@ +import numpy as np +from .blursharpen import LinearMotionBlur, blursharpen +import cv2 + +def apply_random_rgb_levels(img, mask=None, rnd_state=None): + if rnd_state is None: + rnd_state = np.random + np_rnd = rnd_state.rand + + inBlack = np.array([np_rnd()*0.25 , np_rnd()*0.25 , np_rnd()*0.25], dtype=np.float32) + inWhite = np.array([1.0-np_rnd()*0.25, 1.0-np_rnd()*0.25, 1.0-np_rnd()*0.25], dtype=np.float32) + inGamma = np.array([0.5+np_rnd(), 0.5+np_rnd(), 0.5+np_rnd()], dtype=np.float32) + + outBlack = np.array([np_rnd()*0.25 , np_rnd()*0.25 , np_rnd()*0.25], dtype=np.float32) + outWhite = np.array([1.0-np_rnd()*0.25, 1.0-np_rnd()*0.25, 1.0-np_rnd()*0.25], dtype=np.float32) + + result = np.clip( (img - inBlack) / (inWhite - inBlack), 0, 1 ) + result = ( result ** (1/inGamma) ) * (outWhite - outBlack) + outBlack + result = np.clip(result, 0, 1) + + if mask is not None: + result = img*(1-mask) + result*mask + + return result + +def apply_random_hsv_shift(img, mask=None, rnd_state=None): + if rnd_state is None: + rnd_state = np.random + + h, s, v = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV)) + h = ( h + rnd_state.randint(360) ) % 360 + s = np.clip ( s + rnd_state.random()-0.5, 0, 1 ) + v = np.clip ( v + rnd_state.random()-0.5, 0, 1 ) + + result = np.clip( cv2.cvtColor(cv2.merge([h, s, v]), cv2.COLOR_HSV2BGR) , 0, 1 ) + if mask is not None: + result = img*(1-mask) + result*mask + + return result + +def apply_random_sharpen( img, chance, kernel_max_size, mask=None, rnd_state=None ): + if rnd_state is None: + rnd_state = np.random + + sharp_rnd_kernel = rnd_state.randint(kernel_max_size)+1 + + result = img + if rnd_state.randint(100) < np.clip(chance, 0, 100): + if rnd_state.randint(2) == 0: + result = blursharpen(result, 1, sharp_rnd_kernel, rnd_state.randint(10) ) + else: + result = blursharpen(result, 2, sharp_rnd_kernel, rnd_state.randint(50) ) + + if mask is not None: + result = img*(1-mask) + result*mask + + return result + +def apply_random_motion_blur( img, chance, mb_max_size, mask=None, rnd_state=None ): + if rnd_state is None: + rnd_state = np.random + + mblur_rnd_kernel = rnd_state.randint(mb_max_size)+1 + mblur_rnd_deg = rnd_state.randint(360) + + result = img + if rnd_state.randint(100) < np.clip(chance, 0, 100): + result = LinearMotionBlur (result, mblur_rnd_kernel, mblur_rnd_deg ) + if mask is not None: + result = img*(1-mask) + result*mask + + return result + +def apply_random_gaussian_blur( img, chance, kernel_max_size, mask=None, rnd_state=None ): + if rnd_state is None: + rnd_state = np.random + + result = img + if rnd_state.randint(100) < np.clip(chance, 0, 100): + gblur_rnd_kernel = rnd_state.randint(kernel_max_size)*2+1 + result = cv2.GaussianBlur(result, (gblur_rnd_kernel,)*2 , 0) + if mask is not None: + result = img*(1-mask) + result*mask + + return result + +def apply_random_resize( img, chance, max_size_per, interpolation=cv2.INTER_LINEAR, mask=None, rnd_state=None ): + if rnd_state is None: + rnd_state = np.random + + result = img + if rnd_state.randint(100) < np.clip(chance, 0, 100): + h,w,c = result.shape + + trg = rnd_state.rand() + rw = w - int( trg * int(w*(max_size_per/100.0)) ) + rh = h - int( trg * int(h*(max_size_per/100.0)) ) + + result = cv2.resize (result, (rw,rh), interpolation=interpolation ) + result = cv2.resize (result, (w,h), interpolation=interpolation ) + if mask is not None: + result = img*(1-mask) + result*mask + + return result + +def apply_random_nearest_resize( img, chance, max_size_per, mask=None, rnd_state=None ): + return apply_random_resize( img, chance, max_size_per, interpolation=cv2.INTER_NEAREST, mask=mask, rnd_state=rnd_state ) + +def apply_random_bilinear_resize( img, chance, max_size_per, mask=None, rnd_state=None ): + return apply_random_resize( img, chance, max_size_per, interpolation=cv2.INTER_LINEAR, mask=mask, rnd_state=rnd_state ) + +def apply_random_jpeg_compress( img, chance, mask=None, rnd_state=None ): + if rnd_state is None: + rnd_state = np.random + + result = img + if rnd_state.randint(100) < np.clip(chance, 0, 100): + h,w,c = result.shape + + quality = rnd_state.randint(10,101) + + ret, result = cv2.imencode('.jpg', np.clip(img*255, 0,255).astype(np.uint8), [int(cv2.IMWRITE_JPEG_QUALITY), quality] ) + if ret == True: + result = cv2.imdecode(result, flags=cv2.IMREAD_UNCHANGED) + result = result.astype(np.float32) / 255.0 + if mask is not None: + result = img*(1-mask) + result*mask + + return result + +def apply_random_overlay_triangle( img, max_alpha, mask=None, rnd_state=None ): + if rnd_state is None: + rnd_state = np.random + + h,w,c = img.shape + pt1 = [rnd_state.randint(w), rnd_state.randint(h) ] + pt2 = [rnd_state.randint(w), rnd_state.randint(h) ] + pt3 = [rnd_state.randint(w), rnd_state.randint(h) ] + + alpha = rnd_state.uniform()*max_alpha + + tri_mask = cv2.fillPoly( np.zeros_like(img), [ np.array([pt1,pt2,pt3], np.int32) ], (alpha,)*c ) + + if rnd_state.randint(2) == 0: + result = np.clip(img+tri_mask, 0, 1) + else: + result = np.clip(img-tri_mask, 0, 1) + + if mask is not None: + result = img*(1-mask) + result*mask + + return result + +def _min_resize(x, m): + if x.shape[0] < x.shape[1]: + s0 = m + s1 = int(float(m) / float(x.shape[0]) * float(x.shape[1])) + else: + s0 = int(float(m) / float(x.shape[1]) * float(x.shape[0])) + s1 = m + new_max = min(s1, s0) + raw_max = min(x.shape[0], x.shape[1]) + return cv2.resize(x, (s1, s0), interpolation=cv2.INTER_LANCZOS4) + +def _d_resize(x, d, fac=1.0): + new_min = min(int(d[1] * fac), int(d[0] * fac)) + raw_min = min(x.shape[0], x.shape[1]) + if new_min < raw_min: + interpolation = cv2.INTER_AREA + else: + interpolation = cv2.INTER_LANCZOS4 + y = cv2.resize(x, (int(d[1] * fac), int(d[0] * fac)), interpolation=interpolation) + return y + +def _get_image_gradient(dist): + cols = cv2.filter2D(dist, cv2.CV_32F, np.array([[-1, 0, +1], [-2, 0, +2], [-1, 0, +1]])) + rows = cv2.filter2D(dist, cv2.CV_32F, np.array([[-1, -2, -1], [0, 0, 0], [+1, +2, +1]])) + return cols, rows + +def _generate_lighting_effects(content): + h512 = content + h256 = cv2.pyrDown(h512) + h128 = cv2.pyrDown(h256) + h64 = cv2.pyrDown(h128) + h32 = cv2.pyrDown(h64) + h16 = cv2.pyrDown(h32) + c512, r512 = _get_image_gradient(h512) + c256, r256 = _get_image_gradient(h256) + c128, r128 = _get_image_gradient(h128) + c64, r64 = _get_image_gradient(h64) + c32, r32 = _get_image_gradient(h32) + c16, r16 = _get_image_gradient(h16) + c = c16 + c = _d_resize(cv2.pyrUp(c), c32.shape) * 4.0 + c32 + c = _d_resize(cv2.pyrUp(c), c64.shape) * 4.0 + c64 + c = _d_resize(cv2.pyrUp(c), c128.shape) * 4.0 + c128 + c = _d_resize(cv2.pyrUp(c), c256.shape) * 4.0 + c256 + c = _d_resize(cv2.pyrUp(c), c512.shape) * 4.0 + c512 + r = r16 + r = _d_resize(cv2.pyrUp(r), r32.shape) * 4.0 + r32 + r = _d_resize(cv2.pyrUp(r), r64.shape) * 4.0 + r64 + r = _d_resize(cv2.pyrUp(r), r128.shape) * 4.0 + r128 + r = _d_resize(cv2.pyrUp(r), r256.shape) * 4.0 + r256 + r = _d_resize(cv2.pyrUp(r), r512.shape) * 4.0 + r512 + coarse_effect_cols = c + coarse_effect_rows = r + EPS = 1e-10 + + max_effect = np.max((coarse_effect_cols**2 + coarse_effect_rows**2)**0.5, axis=0, keepdims=True, ).max(1, keepdims=True) + coarse_effect_cols = (coarse_effect_cols + EPS) / (max_effect + EPS) + coarse_effect_rows = (coarse_effect_rows + EPS) / (max_effect + EPS) + + return np.stack([ np.zeros_like(coarse_effect_rows), coarse_effect_rows, coarse_effect_cols], axis=-1) + +def apply_random_relight(img, mask=None, rnd_state=None): + if rnd_state is None: + rnd_state = np.random + + def_img = img + + if rnd_state.randint(2) == 0: + light_pos_y = 1.0 if rnd_state.randint(2) == 0 else -1.0 + light_pos_x = rnd_state.uniform()*2-1.0 + else: + light_pos_y = rnd_state.uniform()*2-1.0 + light_pos_x = 1.0 if rnd_state.randint(2) == 0 else -1.0 + + light_source_height = 0.3*rnd_state.uniform()*0.7 + light_intensity = 1.0+rnd_state.uniform() + ambient_intensity = 0.5 + + light_source_location = np.array([[[light_source_height, light_pos_y, light_pos_x ]]], dtype=np.float32) + light_source_direction = light_source_location / np.sqrt(np.sum(np.square(light_source_location))) + + lighting_effect = _generate_lighting_effects(img) + lighting_effect = np.sum(lighting_effect * light_source_direction, axis=-1).clip(0, 1) + lighting_effect = np.mean(lighting_effect, axis=-1, keepdims=True) + + result = def_img * (ambient_intensity + lighting_effect * light_intensity) #light_source_color + result = np.clip(result, 0, 1) + + if mask is not None: + result = def_img*(1-mask) + result*mask + + return result \ No newline at end of file diff --git a/face_detect/core/imagelib/morph.py b/face_detect/core/imagelib/morph.py new file mode 100644 index 0000000000000000000000000000000000000000..8aa5114c006deaf2c7a2825ab6f5b951569e19e7 --- /dev/null +++ b/face_detect/core/imagelib/morph.py @@ -0,0 +1,37 @@ +import numpy as np +import cv2 +from scipy.spatial import Delaunay + + +def applyAffineTransform(src, srcTri, dstTri, size) : + warpMat = cv2.getAffineTransform( np.float32(srcTri), np.float32(dstTri) ) + return cv2.warpAffine( src, warpMat, (size[0], size[1]), None, flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101 ) + +def morphTriangle(dst_img, src_img, st, dt) : + (h,w,c) = dst_img.shape + sr = np.array( cv2.boundingRect(np.float32(st)) ) + dr = np.array( cv2.boundingRect(np.float32(dt)) ) + sRect = st - sr[0:2] + dRect = dt - dr[0:2] + d_mask = np.zeros((dr[3], dr[2], c), dtype = np.float32) + cv2.fillConvexPoly(d_mask, np.int32(dRect), (1.0,)*c, 8, 0); + imgRect = src_img[sr[1]:sr[1] + sr[3], sr[0]:sr[0] + sr[2]] + size = (dr[2], dr[3]) + warpImage1 = applyAffineTransform(imgRect, sRect, dRect, size) + + if c == 1: + warpImage1 = np.expand_dims( warpImage1, -1 ) + + dst_img[dr[1]:dr[1]+dr[3], dr[0]:dr[0]+dr[2]] = dst_img[dr[1]:dr[1]+dr[3], dr[0]:dr[0]+dr[2]]*(1-d_mask) + warpImage1 * d_mask + +def morph_by_points (image, sp, dp): + if sp.shape != dp.shape: + raise ValueError ('morph_by_points() sp.shape != dp.shape') + (h,w,c) = image.shape + + result_image = np.zeros(image.shape, dtype = image.dtype) + + for tri in Delaunay(dp).simplices: + morphTriangle(result_image, image, sp[tri], dp[tri]) + + return result_image \ No newline at end of file diff --git a/face_detect/core/imagelib/reduce_colors.py b/face_detect/core/imagelib/reduce_colors.py new file mode 100644 index 0000000000000000000000000000000000000000..961f00ddf07886227154034b97fccfabf08a205d --- /dev/null +++ b/face_detect/core/imagelib/reduce_colors.py @@ -0,0 +1,14 @@ +import numpy as np +import cv2 +from PIL import Image + +#n_colors = [0..256] +def reduce_colors (img_bgr, n_colors): + img_rgb = (img_bgr[...,::-1] * 255.0).astype(np.uint8) + img_rgb_pil = Image.fromarray(img_rgb) + img_rgb_pil_p = img_rgb_pil.convert('P', palette=Image.ADAPTIVE, colors=n_colors) + + img_rgb_p = img_rgb_pil_p.convert('RGB') + img_bgr = cv2.cvtColor( np.array(img_rgb_p, dtype=np.float32) / 255.0, cv2.COLOR_RGB2BGR ) + + return img_bgr diff --git a/face_detect/core/imagelib/sd/__init__.py b/face_detect/core/imagelib/sd/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1cddc19473acb301104519b5eacf01f7f9afa42b --- /dev/null +++ b/face_detect/core/imagelib/sd/__init__.py @@ -0,0 +1,2 @@ +from .draw import circle_faded, random_circle_faded, bezier, random_bezier_split_faded, random_faded +from .calc import * \ No newline at end of file diff --git a/face_detect/core/imagelib/sd/calc.py b/face_detect/core/imagelib/sd/calc.py new file mode 100644 index 0000000000000000000000000000000000000000..2304e6645f8ab1522906c1f436f880370ee1e40a --- /dev/null +++ b/face_detect/core/imagelib/sd/calc.py @@ -0,0 +1,25 @@ +import numpy as np +import numpy.linalg as npla + +def dist_to_edges(pts, pt, is_closed=False): + """ + returns array of dist from pt to edge and projection pt to edges + """ + if is_closed: + a = pts + b = np.concatenate( (pts[1:,:], pts[0:1,:]), axis=0 ) + else: + a = pts[:-1,:] + b = pts[1:,:] + + pa = pt-a + ba = b-a + + div = np.einsum('ij,ij->i', ba, ba) + div[div==0]=1 + h = np.clip( np.einsum('ij,ij->i', pa, ba) / div, 0, 1 ) + + x = npla.norm ( pa - ba*h[...,None], axis=1 ) + + return x, a+ba*h[...,None] + diff --git a/face_detect/core/imagelib/sd/draw.py b/face_detect/core/imagelib/sd/draw.py new file mode 100644 index 0000000000000000000000000000000000000000..711ad33ad1b6bae31fe66b1adcb7a8e808c446e4 --- /dev/null +++ b/face_detect/core/imagelib/sd/draw.py @@ -0,0 +1,200 @@ +""" +Signed distance drawing functions using numpy. +""" +import math + +import numpy as np +from numpy import linalg as npla + + +def vector2_dot(a,b): + return a[...,0]*b[...,0]+a[...,1]*b[...,1] + +def vector2_dot2(a): + return a[...,0]*a[...,0]+a[...,1]*a[...,1] + +def vector2_cross(a,b): + return a[...,0]*b[...,1]-a[...,1]*b[...,0] + + +def circle_faded( wh, center, fade_dists ): + """ + returns drawn circle in [h,w,1] output range [0..1.0] float32 + + wh = [w,h] resolution + center = [x,y] center of circle + fade_dists = [fade_start, fade_end] fade values + """ + w,h = wh + + pts = np.empty( (h,w,2), dtype=np.float32 ) + pts[...,0] = np.arange(w)[:,None] + pts[...,1] = np.arange(h)[None,:] + + pts = pts.reshape ( (h*w, -1) ) + + pts_dists = np.abs ( npla.norm(pts-center, axis=-1) ) + + if fade_dists[1] == 0: + fade_dists[1] = 1 + + pts_dists = ( pts_dists - fade_dists[0] ) / fade_dists[1] + + pts_dists = np.clip( 1-pts_dists, 0, 1) + + return pts_dists.reshape ( (h,w,1) ).astype(np.float32) + + +def bezier( wh, A, B, C ): + """ + returns drawn bezier in [h,w,1] output range float32, + every pixel contains signed distance to bezier line + + wh [w,h] resolution + A,B,C points [x,y] + """ + + width,height = wh + + A = np.float32(A) + B = np.float32(B) + C = np.float32(C) + + + pos = np.empty( (height,width,2), dtype=np.float32 ) + pos[...,0] = np.arange(width)[:,None] + pos[...,1] = np.arange(height)[None,:] + + + a = B-A + b = A - 2.0*B + C + c = a * 2.0 + d = A - pos + + b_dot = vector2_dot(b,b) + if b_dot == 0.0: + return np.zeros( (height,width), dtype=np.float32 ) + + kk = 1.0 / b_dot + + kx = kk * vector2_dot(a,b) + ky = kk * (2.0*vector2_dot(a,a)+vector2_dot(d,b))/3.0; + kz = kk * vector2_dot(d,a); + + res = 0.0; + sgn = 0.0; + + p = ky - kx*kx; + + p3 = p*p*p; + q = kx*(2.0*kx*kx - 3.0*ky) + kz; + h = q*q + 4.0*p3; + + hp_sel = h >= 0.0 + + hp_p = h[hp_sel] + hp_p = np.sqrt(hp_p) + + hp_x = ( np.stack( (hp_p,-hp_p), -1) -q[hp_sel,None] ) / 2.0 + hp_uv = np.sign(hp_x) * np.power( np.abs(hp_x), [1.0/3.0, 1.0/3.0] ) + hp_t = np.clip( hp_uv[...,0] + hp_uv[...,1] - kx, 0.0, 1.0 ) + + hp_t = hp_t[...,None] + hp_q = d[hp_sel]+(c+b*hp_t)*hp_t + hp_res = vector2_dot2(hp_q) + hp_sgn = vector2_cross(c+2.0*b*hp_t,hp_q) + + hl_sel = h < 0.0 + + hl_q = q[hl_sel] + hl_p = p[hl_sel] + hl_z = np.sqrt(-hl_p) + hl_v = np.arccos( hl_q / (hl_p*hl_z*2.0)) / 3.0 + + hl_m = np.cos(hl_v) + hl_n = np.sin(hl_v)*1.732050808; + + hl_t = np.clip( np.stack( (hl_m+hl_m,-hl_n-hl_m,hl_n-hl_m), -1)*hl_z[...,None]-kx, 0.0, 1.0 ); + + hl_d = d[hl_sel] + + hl_qx = hl_d+(c+b*hl_t[...,0:1])*hl_t[...,0:1] + + hl_dx = vector2_dot2(hl_qx) + hl_sx = vector2_cross(c+2.0*b*hl_t[...,0:1], hl_qx) + + hl_qy = hl_d+(c+b*hl_t[...,1:2])*hl_t[...,1:2] + hl_dy = vector2_dot2(hl_qy) + hl_sy = vector2_cross(c+2.0*b*hl_t[...,1:2],hl_qy); + + hl_dx_l_dy = hl_dx=hl_dy + + hl_res = np.empty_like(hl_dx) + hl_res[hl_dx_l_dy] = hl_dx[hl_dx_l_dy] + hl_res[hl_dx_ge_dy] = hl_dy[hl_dx_ge_dy] + + hl_sgn = np.empty_like(hl_sx) + hl_sgn[hl_dx_l_dy] = hl_sx[hl_dx_l_dy] + hl_sgn[hl_dx_ge_dy] = hl_sy[hl_dx_ge_dy] + + res = np.empty( (height, width), np.float32 ) + res[hp_sel] = hp_res + res[hl_sel] = hl_res + + sgn = np.empty( (height, width), np.float32 ) + sgn[hp_sel] = hp_sgn + sgn[hl_sel] = hl_sgn + + sgn = np.sign(sgn) + res = np.sqrt(res)*sgn + + return res[...,None] + +def random_faded(wh): + """ + apply one of them: + random_circle_faded + random_bezier_split_faded + """ + rnd = np.random.randint(2) + if rnd == 0: + return random_circle_faded(wh) + elif rnd == 1: + return random_bezier_split_faded(wh) + +def random_circle_faded ( wh, rnd_state=None ): + if rnd_state is None: + rnd_state = np.random + + w,h = wh + wh_max = max(w,h) + fade_start = rnd_state.randint(wh_max) + fade_end = fade_start + rnd_state.randint(wh_max- fade_start) + + return circle_faded (wh, [ rnd_state.randint(h), rnd_state.randint(w) ], + [fade_start, fade_end] ) + +def random_bezier_split_faded( wh ): + width, height = wh + + degA = np.random.randint(360) + degB = np.random.randint(360) + degC = np.random.randint(360) + + deg_2_rad = math.pi / 180.0 + + center = np.float32([width / 2.0, height / 2.0]) + + radius = max(width, height) + + A = center + radius*np.float32([ math.sin( degA * deg_2_rad), math.cos( degA * deg_2_rad) ] ) + B = center + np.random.randint(radius)*np.float32([ math.sin( degB * deg_2_rad), math.cos( degB * deg_2_rad) ] ) + C = center + radius*np.float32([ math.sin( degC * deg_2_rad), math.cos( degC * deg_2_rad) ] ) + + x = bezier( (width,height), A, B, C ) + + x = x / (1+np.random.randint(radius)) + 0.5 + + x = np.clip(x, 0, 1) + return x diff --git a/face_detect/core/imagelib/warp.py b/face_detect/core/imagelib/warp.py new file mode 100644 index 0000000000000000000000000000000000000000..bcfb6c56de982b829bc2ee5a7b16e1f7000f9e68 --- /dev/null +++ b/face_detect/core/imagelib/warp.py @@ -0,0 +1,72 @@ +import numpy as np +import cv2 +from face_detect.core import randomex + +def gen_warp_params (w, flip=False, rotation_range=[-10,10], scale_range=[-0.5, 0.5], tx_range=[-0.05, 0.05], ty_range=[-0.05, 0.05], rnd_state=None ): + if rnd_state is None: + rnd_state = np.random + + rw = None + if w < 64: + rw = w + w = 64 + + rotation = rnd_state.uniform( rotation_range[0], rotation_range[1] ) + scale = rnd_state.uniform(1 +scale_range[0], 1 +scale_range[1]) + tx = rnd_state.uniform( tx_range[0], tx_range[1] ) + ty = rnd_state.uniform( ty_range[0], ty_range[1] ) + p_flip = flip and rnd_state.randint(10) < 4 + + #random warp by grid + cell_size = [ w // (2**i) for i in range(1,4) ] [ rnd_state.randint(3) ] + cell_count = w // cell_size + 1 + + grid_points = np.linspace( 0, w, cell_count) + mapx = np.broadcast_to(grid_points, (cell_count, cell_count)).copy() + mapy = mapx.T + + mapx[1:-1,1:-1] = mapx[1:-1,1:-1] + randomex.random_normal( size=(cell_count-2, cell_count-2) )*(cell_size*0.24) + mapy[1:-1,1:-1] = mapy[1:-1,1:-1] + randomex.random_normal( size=(cell_count-2, cell_count-2) )*(cell_size*0.24) + + half_cell_size = cell_size // 2 + + mapx = cv2.resize(mapx, (w+cell_size,)*2 )[half_cell_size:-half_cell_size,half_cell_size:-half_cell_size].astype(np.float32) + mapy = cv2.resize(mapy, (w+cell_size,)*2 )[half_cell_size:-half_cell_size,half_cell_size:-half_cell_size].astype(np.float32) + + #random transform + random_transform_mat = cv2.getRotationMatrix2D((w // 2, w // 2), rotation, scale) + random_transform_mat[:, 2] += (tx*w, ty*w) + + params = dict() + params['mapx'] = mapx + params['mapy'] = mapy + params['rmat'] = random_transform_mat + u_mat = random_transform_mat.copy() + u_mat[:,2] /= w + params['umat'] = u_mat + params['w'] = w + params['rw'] = rw + params['flip'] = p_flip + + return params + +def warp_by_params (params, img, can_warp, can_transform, can_flip, border_replicate, cv2_inter=cv2.INTER_CUBIC): + rw = params['rw'] + + if (can_warp or can_transform) and rw is not None: + img = cv2.resize(img, (64,64), interpolation=cv2_inter) + + if can_warp: + img = cv2.remap(img, params['mapx'], params['mapy'], cv2_inter ) + if can_transform: + img = cv2.warpAffine( img, params['rmat'], (params['w'], params['w']), borderMode=(cv2.BORDER_REPLICATE if border_replicate else cv2.BORDER_CONSTANT), flags=cv2_inter ) + + + if (can_warp or can_transform) and rw is not None: + img = cv2.resize(img, (rw,rw), interpolation=cv2_inter) + + if len(img.shape) == 2: + img = img[...,None] + if can_flip and params['flip']: + img = img[:,::-1,...] + return img \ No newline at end of file diff --git a/face_detect/core/leras/__init__.py b/face_detect/core/leras/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7d9fb2b0ea9e79230e9e7b00fe4cc28654149340 --- /dev/null +++ b/face_detect/core/leras/__init__.py @@ -0,0 +1 @@ +from .nn import nn \ No newline at end of file diff --git a/face_detect/core/leras/archis/ArchiBase.py b/face_detect/core/leras/archis/ArchiBase.py new file mode 100644 index 0000000000000000000000000000000000000000..92f265b367fbfbac57ec2a8c0aa43852c2eadb69 --- /dev/null +++ b/face_detect/core/leras/archis/ArchiBase.py @@ -0,0 +1,17 @@ +from face_feature.core.leras import nn + +class ArchiBase(): + + def __init__(self, *args, name=None, **kwargs): + self.name=name + + + #overridable + def flow(self, *args, **kwargs): + raise Exception("this archi does not support flow. Use model classes directly.") + + #overridable + def get_weights(self): + pass + +nn.ArchiBase = ArchiBase \ No newline at end of file diff --git a/face_detect/core/leras/archis/DeepFakeArchi.py b/face_detect/core/leras/archis/DeepFakeArchi.py new file mode 100644 index 0000000000000000000000000000000000000000..acbd39abdc0069013caee6b6c8e95d2adc770a09 --- /dev/null +++ b/face_detect/core/leras/archis/DeepFakeArchi.py @@ -0,0 +1,223 @@ +from face_feature.core.leras import nn + +tf = nn.tf + + +class DeepFakeArchi(nn.ArchiBase): + """ + resolution + + mod None - default + 'quick' + """ + + def __init__(self, resolution, mod=None, opts=None): + super().__init__() + + if opts is None: + opts = '' + + if mod is None: + class Downscale(nn.ModelBase): + def __init__(self, in_ch, out_ch, kernel_size=5, *kwargs): + self.in_ch = in_ch + self.out_ch = out_ch + self.kernel_size = kernel_size + super().__init__(*kwargs) + + def on_build(self, *args, **kwargs): + self.conv1 = nn.Conv2D(self.in_ch, self.out_ch, kernel_size=self.kernel_size, strides=2, + padding='SAME') + + def forward(self, x): + x = self.conv1(x) + x = tf.nn.leaky_relu(x, 0.1) + return x + + def get_out_ch(self): + return self.out_ch + + class DownscaleBlock(nn.ModelBase): + def on_build(self, in_ch, ch, n_downscales, kernel_size): + self.downs = [] + + last_ch = in_ch + for i in range(n_downscales): + cur_ch = ch * (min(2 ** i, 8)) + self.downs.append(Downscale(last_ch, cur_ch, kernel_size=kernel_size)) + last_ch = self.downs[-1].get_out_ch() + + def forward(self, inp): + x = inp + for down in self.downs: + x = down(x) + return x + + class Upscale(nn.ModelBase): + def on_build(self, in_ch, out_ch, kernel_size=3): + self.conv1 = nn.Conv2D(in_ch, out_ch * 4, kernel_size=kernel_size, padding='SAME') + + def forward(self, x): + x = self.conv1(x) + x = tf.nn.leaky_relu(x, 0.1) + x = nn.depth_to_space(x, 2) + return x + + class ResidualBlock(nn.ModelBase): + def on_build(self, ch, kernel_size=3): + self.conv1 = nn.Conv2D(ch, ch, kernel_size=kernel_size, padding='SAME') + self.conv2 = nn.Conv2D(ch, ch, kernel_size=kernel_size, padding='SAME') + + def forward(self, inp): + x = self.conv1(inp) + x = tf.nn.leaky_relu(x, 0.2) + x = self.conv2(x) + x = tf.nn.leaky_relu(inp + x, 0.2) + return x + + class Encoder(nn.ModelBase): + def __init__(self, in_ch, e_ch, **kwargs): + self.in_ch = in_ch + self.e_ch = e_ch + super().__init__(**kwargs) + + def on_build(self): + self.down1 = DownscaleBlock(self.in_ch, self.e_ch, n_downscales=4, kernel_size=5) + + def forward(self, inp): + return nn.flatten(self.down1(inp)) + + def get_out_res(self, res): + return res // (2 ** 4) + + def get_out_ch(self): + return self.e_ch * 8 + + lowest_dense_res = resolution // (32 if 'd' in opts else 16) + + class Inter(nn.ModelBase): + def __init__(self, in_ch, ae_ch, ae_out_ch, **kwargs): + self.in_ch, self.ae_ch, self.ae_out_ch = in_ch, ae_ch, ae_out_ch + super().__init__(**kwargs) + + def on_build(self): + in_ch, ae_ch, ae_out_ch = self.in_ch, self.ae_ch, self.ae_out_ch + if 'u' in opts: + self.dense_norm = nn.DenseNorm() + + self.dense1 = nn.Dense(in_ch, ae_ch) + self.dense2 = nn.Dense(ae_ch, lowest_dense_res * lowest_dense_res * ae_out_ch) + self.upscale1 = Upscale(ae_out_ch, ae_out_ch) + + def forward(self, inp): + x = inp + if 'u' in opts: + x = self.dense_norm(x) + x = self.dense1(x) + x = self.dense2(x) + x = nn.reshape_4D(x, lowest_dense_res, lowest_dense_res, self.ae_out_ch) + x = self.upscale1(x) + return x + + def get_out_res(self): + return lowest_dense_res * 2 + + def get_out_ch(self): + return self.ae_out_ch + + class Decoder(nn.ModelBase): + def on_build(self, in_ch, d_ch, d_mask_ch): + self.upscale0 = Upscale(in_ch, d_ch * 8, kernel_size=3) + self.upscale1 = Upscale(d_ch * 8, d_ch * 4, kernel_size=3) + self.upscale2 = Upscale(d_ch * 4, d_ch * 2, kernel_size=3) + + self.res0 = ResidualBlock(d_ch * 8, kernel_size=3) + self.res1 = ResidualBlock(d_ch * 4, kernel_size=3) + self.res2 = ResidualBlock(d_ch * 2, kernel_size=3) + + self.out_conv = nn.Conv2D(d_ch * 2, 3, kernel_size=1, padding='SAME') + + # self.upscalem0 = Upscale(in_ch, d_mask_ch * 8, kernel_size=3) + # self.upscalem1 = Upscale(d_mask_ch * 8, d_mask_ch * 4, kernel_size=3) + # self.upscalem2 = Upscale(d_mask_ch * 4, d_mask_ch * 2, kernel_size=3) + # self.out_convm = nn.Conv2D(d_mask_ch * 2, 1, kernel_size=1, padding='SAME') + + if 'd' in opts: + self.out_conv1 = nn.Conv2D(d_ch * 2, 3, kernel_size=3, padding='SAME') + self.out_conv2 = nn.Conv2D(d_ch * 2, 3, kernel_size=3, padding='SAME') + self.out_conv3 = nn.Conv2D(d_ch * 2, 3, kernel_size=3, padding='SAME') + # self.upscalem3 = Upscale(d_mask_ch * 2, d_mask_ch * 1, kernel_size=3) + # self.out_convm = nn.Conv2D(d_mask_ch * 1, 1, kernel_size=1, padding='SAME') + else: + # self.out_convm = nn.Conv2D(d_mask_ch * 2, 1, kernel_size=1, padding='SAME') + pass + + def forward(self, inp): + z = inp + + x = self.upscale0(z) + x = self.res0(x) + x = self.upscale1(x) + x = self.res1(x) + x = self.upscale2(x) + x = self.res2(x) + + if 'd' in opts: + x0 = tf.nn.sigmoid(self.out_conv(x)) + x0 = nn.upsample2d(x0) + x1 = tf.nn.sigmoid(self.out_conv1(x)) + x1 = nn.upsample2d(x1) + x2 = tf.nn.sigmoid(self.out_conv2(x)) + x2 = nn.upsample2d(x2) + x3 = tf.nn.sigmoid(self.out_conv3(x)) + x3 = nn.upsample2d(x3) + + if nn.data_format == "NHWC": + tile_cfg = (1, resolution // 2, resolution // 2, 1) + else: + tile_cfg = (1, 1, resolution // 2, resolution // 2) + + z0 = tf.concat( + (tf.concat((tf.ones((1, 1, 1, 1)), tf.zeros((1, 1, 1, 1))), axis=nn.conv2d_spatial_axes[1]), + tf.concat((tf.zeros((1, 1, 1, 1)), tf.zeros((1, 1, 1, 1))), + axis=nn.conv2d_spatial_axes[1])), axis=nn.conv2d_spatial_axes[0]) + + z0 = tf.tile(z0, tile_cfg) + + z1 = tf.concat( + (tf.concat((tf.zeros((1, 1, 1, 1)), tf.ones((1, 1, 1, 1))), axis=nn.conv2d_spatial_axes[1]), + tf.concat((tf.zeros((1, 1, 1, 1)), tf.zeros((1, 1, 1, 1))), + axis=nn.conv2d_spatial_axes[1])), axis=nn.conv2d_spatial_axes[0]) + z1 = tf.tile(z1, tile_cfg) + + z2 = tf.concat((tf.concat((tf.zeros((1, 1, 1, 1)), tf.zeros((1, 1, 1, 1))), + axis=nn.conv2d_spatial_axes[1]), + tf.concat((tf.ones((1, 1, 1, 1)), tf.zeros((1, 1, 1, 1))), + axis=nn.conv2d_spatial_axes[1])), axis=nn.conv2d_spatial_axes[0]) + z2 = tf.tile(z2, tile_cfg) + + z3 = tf.concat((tf.concat((tf.zeros((1, 1, 1, 1)), tf.zeros((1, 1, 1, 1))), + axis=nn.conv2d_spatial_axes[1]), + tf.concat((tf.zeros((1, 1, 1, 1)), tf.ones((1, 1, 1, 1))), + axis=nn.conv2d_spatial_axes[1])), axis=nn.conv2d_spatial_axes[0]) + z3 = tf.tile(z3, tile_cfg) + + x = x0 * z0 + x1 * z1 + x2 * z2 + x3 * z3 + else: + x = tf.nn.sigmoid(self.out_conv(x)) + + # m = self.upscalem0(z) + # m = self.upscalem1(m) + # m = self.upscalem2(m) + # if 'd' in opts: + # m = self.upscalem3(m) + # m = tf.nn.sigmoid(self.out_convm(m)) + + return x + + self.Encoder = Encoder + self.Inter = Inter + self.Decoder = Decoder + + +nn.DeepFakeArchi = DeepFakeArchi diff --git a/face_detect/core/leras/archis/__init__.py b/face_detect/core/leras/archis/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3734ddd68904c89e0f494017094c2c1cb0447b42 --- /dev/null +++ b/face_detect/core/leras/archis/__init__.py @@ -0,0 +1,2 @@ +from .ArchiBase import * +from .DeepFakeArchi import * \ No newline at end of file diff --git a/face_detect/core/leras/device.py b/face_detect/core/leras/device.py new file mode 100644 index 0000000000000000000000000000000000000000..03a2a0ef412dc7f60e19d72bcb383312b773f4c3 --- /dev/null +++ b/face_detect/core/leras/device.py @@ -0,0 +1,272 @@ +import sys +import ctypes +import os +import multiprocessing +import json +import time +from pathlib import Path +# from face_feature.core.interact import interact as io + + +class Device(object): + def __init__(self, index, tf_dev_type, name, total_mem, free_mem): + self.index = index + self.tf_dev_type = tf_dev_type + self.name = name + + self.total_mem = total_mem + self.total_mem_gb = total_mem / 1024**3 + self.free_mem = free_mem + self.free_mem_gb = free_mem / 1024**3 + + def __str__(self): + return f"[{self.index}]:[{self.name}][{self.free_mem_gb:.3}/{self.total_mem_gb :.3}]" + +class Devices(object): + all_devices = None + + def __init__(self, devices): + self.devices = devices + + def __len__(self): + return len(self.devices) + + def __getitem__(self, key): + result = self.devices[key] + if isinstance(key, slice): + return Devices(result) + return result + + def __iter__(self): + for device in self.devices: + yield device + + def get_best_device(self): + result = None + idx_mem = 0 + for device in self.devices: + mem = device.total_mem + if mem > idx_mem: + result = device + idx_mem = mem + return result + + def get_worst_device(self): + result = None + idx_mem = sys.maxsize + for device in self.devices: + mem = device.total_mem + if mem < idx_mem: + result = device + idx_mem = mem + return result + + def get_device_by_index(self, idx): + for device in self.devices: + if device.index == idx: + return device + return None + + def get_devices_from_index_list(self, idx_list): + result = [] + for device in self.devices: + if device.index in idx_list: + result += [device] + return Devices(result) + + def get_equal_devices(self, device): + device_name = device.name + result = [] + for device in self.devices: + if device.name == device_name: + result.append (device) + return Devices(result) + + def get_devices_at_least_mem(self, totalmemsize_gb): + result = [] + for device in self.devices: + if device.total_mem >= totalmemsize_gb*(1024**3): + result.append (device) + return Devices(result) + + @staticmethod + def _get_tf_devices_proc(q : multiprocessing.Queue): + + if sys.platform[0:3] == 'win': + compute_cache_path = Path(os.environ['APPDATA']) / 'NVIDIA' / ('ComputeCache_ALL') + os.environ['CUDA_CACHE_PATH'] = str(compute_cache_path) + if not compute_cache_path.exists(): + # io.log_info("Caching GPU kernels...") + compute_cache_path.mkdir(parents=True, exist_ok=True) + + import tensorflow + + tf_version = tensorflow.version.VERSION + #if tf_version is None: + # tf_version = tensorflow.version.GIT_VERSION + if tf_version[0] == 'v': + tf_version = tf_version[1:] + if tf_version[0] == '2': + tf = tensorflow.compat.v1 + else: + tf = tensorflow + + import logging + # Disable tensorflow warnings + tf_logger = logging.getLogger('tensorflow') + tf_logger.setLevel(logging.ERROR) + + from tensorflow.python.client import device_lib + + devices = [] + + physical_devices = device_lib.list_local_devices() + physical_devices_f = {} + for dev in physical_devices: + dev_type = dev.device_type + dev_tf_name = dev.name + dev_tf_name = dev_tf_name[ dev_tf_name.index(dev_type) : ] + + dev_idx = int(dev_tf_name.split(':')[-1]) + + if dev_type in ['GPU','DML']: + dev_name = dev_tf_name + + dev_desc = dev.physical_device_desc + if len(dev_desc) != 0: + if dev_desc[0] == '{': + dev_desc_json = json.loads(dev_desc) + dev_desc_json_name = dev_desc_json.get('name',None) + if dev_desc_json_name is not None: + dev_name = dev_desc_json_name + else: + for param, value in ( v.split(':') for v in dev_desc.split(',') ): + param = param.strip() + value = value.strip() + if param == 'name': + dev_name = value + break + + physical_devices_f[dev_idx] = (dev_type, dev_name, dev.memory_limit) + + q.put(physical_devices_f) + time.sleep(0.1) + + + @staticmethod + def initialize_main_env(): + if int(os.environ.get("NN_DEVICES_INITIALIZED", 0)) != 0: + return + + if 'CUDA_VISIBLE_DEVICES' in os.environ.keys(): + os.environ.pop('CUDA_VISIBLE_DEVICES') + + os.environ['CUDA_​CACHE_​MAXSIZE'] = '2147483647' + os.environ['TF_MIN_GPU_MULTIPROCESSOR_COUNT'] = '2' + os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # tf log errors only + + q = multiprocessing.Queue() + p = multiprocessing.Process(target=Devices._get_tf_devices_proc, args=(q,), daemon=True) + p.start() + p.join() + + visible_devices = q.get() + + os.environ['NN_DEVICES_INITIALIZED'] = '1' + os.environ['NN_DEVICES_COUNT'] = str(len(visible_devices)) + + for i in visible_devices: + dev_type, name, total_mem = visible_devices[i] + + os.environ[f'NN_DEVICE_{i}_TF_DEV_TYPE'] = dev_type + os.environ[f'NN_DEVICE_{i}_NAME'] = name + os.environ[f'NN_DEVICE_{i}_TOTAL_MEM'] = str(total_mem) + os.environ[f'NN_DEVICE_{i}_FREE_MEM'] = str(total_mem) + + + + @staticmethod + def getDevices(): + if Devices.all_devices is None: + if int(os.environ.get("NN_DEVICES_INITIALIZED", 0)) != 1: + raise Exception("nn devices are not initialized. Run initialize_main_env() in main process.") + devices = [] + for i in range ( int(os.environ['NN_DEVICES_COUNT']) ): + devices.append ( Device(index=i, + tf_dev_type=os.environ[f'NN_DEVICE_{i}_TF_DEV_TYPE'], + name=os.environ[f'NN_DEVICE_{i}_NAME'], + total_mem=int(os.environ[f'NN_DEVICE_{i}_TOTAL_MEM']), + free_mem=int(os.environ[f'NN_DEVICE_{i}_FREE_MEM']), ) + ) + Devices.all_devices = Devices(devices) + + return Devices.all_devices + +""" + + + # {'name' : name.split(b'\0', 1)[0].decode(), + # 'total_mem' : totalMem.value + # } + + + + + + return + + + + + min_cc = int(os.environ.get("TF_MIN_REQ_CAP", 35)) + libnames = ('libcuda.so', 'libcuda.dylib', 'nvcuda.dll') + for libname in libnames: + try: + cuda = ctypes.CDLL(libname) + except: + continue + else: + break + else: + return Devices([]) + + nGpus = ctypes.c_int() + name = b' ' * 200 + cc_major = ctypes.c_int() + cc_minor = ctypes.c_int() + freeMem = ctypes.c_size_t() + totalMem = ctypes.c_size_t() + + result = ctypes.c_int() + device = ctypes.c_int() + context = ctypes.c_void_p() + error_str = ctypes.c_char_p() + + devices = [] + + if cuda.cuInit(0) == 0 and \ + cuda.cuDeviceGetCount(ctypes.byref(nGpus)) == 0: + for i in range(nGpus.value): + if cuda.cuDeviceGet(ctypes.byref(device), i) != 0 or \ + cuda.cuDeviceGetName(ctypes.c_char_p(name), len(name), device) != 0 or \ + cuda.cuDeviceComputeCapability(ctypes.byref(cc_major), ctypes.byref(cc_minor), device) != 0: + continue + + if cuda.cuCtxCreate_v2(ctypes.byref(context), 0, device) == 0: + if cuda.cuMemGetInfo_v2(ctypes.byref(freeMem), ctypes.byref(totalMem)) == 0: + cc = cc_major.value * 10 + cc_minor.value + if cc >= min_cc: + devices.append ( {'name' : name.split(b'\0', 1)[0].decode(), + 'total_mem' : totalMem.value, + 'free_mem' : freeMem.value, + 'cc' : cc + }) + cuda.cuCtxDetach(context) + + os.environ['NN_DEVICES_COUNT'] = str(len(devices)) + for i, device in enumerate(devices): + os.environ[f'NN_DEVICE_{i}_NAME'] = device['name'] + os.environ[f'NN_DEVICE_{i}_TOTAL_MEM'] = str(device['total_mem']) + os.environ[f'NN_DEVICE_{i}_FREE_MEM'] = str(device['free_mem']) + os.environ[f'NN_DEVICE_{i}_CC'] = str(device['cc']) +""" \ No newline at end of file diff --git a/face_detect/core/leras/layers/AdaIN.py b/face_detect/core/leras/layers/AdaIN.py new file mode 100644 index 0000000000000000000000000000000000000000..11cafda56cdcce4fc8ab00e2f814aed10b222a10 --- /dev/null +++ b/face_detect/core/leras/layers/AdaIN.py @@ -0,0 +1,56 @@ +from face_feature.core.leras import nn +tf = nn.tf + +class AdaIN(nn.LayerBase): + """ + """ + def __init__(self, in_ch, mlp_ch, kernel_initializer=None, dtype=None, **kwargs): + self.in_ch = in_ch + self.mlp_ch = mlp_ch + self.kernel_initializer = kernel_initializer + + if dtype is None: + dtype = nn.floatx + self.dtype = dtype + + super().__init__(**kwargs) + + def build_weights(self): + kernel_initializer = self.kernel_initializer + if kernel_initializer is None: + kernel_initializer = tf.initializers.he_normal() + + self.weight1 = tf.get_variable("weight1", (self.mlp_ch, self.in_ch), dtype=self.dtype, initializer=kernel_initializer) + self.bias1 = tf.get_variable("bias1", (self.in_ch,), dtype=self.dtype, initializer=tf.initializers.zeros()) + self.weight2 = tf.get_variable("weight2", (self.mlp_ch, self.in_ch), dtype=self.dtype, initializer=kernel_initializer) + self.bias2 = tf.get_variable("bias2", (self.in_ch,), dtype=self.dtype, initializer=tf.initializers.zeros()) + + def get_weights(self): + return [self.weight1, self.bias1, self.weight2, self.bias2] + + def forward(self, inputs): + x, mlp = inputs + + gamma = tf.matmul(mlp, self.weight1) + gamma = tf.add(gamma, tf.reshape(self.bias1, (1,self.in_ch) ) ) + + beta = tf.matmul(mlp, self.weight2) + beta = tf.add(beta, tf.reshape(self.bias2, (1,self.in_ch) ) ) + + + if nn.data_format == "NHWC": + shape = (-1,1,1,self.in_ch) + else: + shape = (-1,self.in_ch,1,1) + + x_mean = tf.reduce_mean(x, axis=nn.conv2d_spatial_axes, keepdims=True ) + x_std = tf.math.reduce_std(x, axis=nn.conv2d_spatial_axes, keepdims=True ) + 1e-5 + + x = (x - x_mean) / x_std + x *= tf.reshape(gamma, shape) + + x += tf.reshape(beta, shape) + + return x + +nn.AdaIN = AdaIN \ No newline at end of file diff --git a/face_detect/core/leras/layers/BatchNorm2D.py b/face_detect/core/leras/layers/BatchNorm2D.py new file mode 100644 index 0000000000000000000000000000000000000000..cb50139c7db73f4a077f02ef866092c286fa1923 --- /dev/null +++ b/face_detect/core/leras/layers/BatchNorm2D.py @@ -0,0 +1,42 @@ +from face_feature.core.leras import nn +tf = nn.tf + +class BatchNorm2D(nn.LayerBase): + """ + currently not for training + """ + def __init__(self, dim, eps=1e-05, momentum=0.1, dtype=None, **kwargs): + self.dim = dim + self.eps = eps + self.momentum = momentum + if dtype is None: + dtype = nn.floatx + self.dtype = dtype + super().__init__(**kwargs) + + def build_weights(self): + self.weight = tf.get_variable("weight", (self.dim,), dtype=self.dtype, initializer=tf.initializers.ones() ) + self.bias = tf.get_variable("bias", (self.dim,), dtype=self.dtype, initializer=tf.initializers.zeros() ) + self.running_mean = tf.get_variable("running_mean", (self.dim,), dtype=self.dtype, initializer=tf.initializers.zeros(), trainable=False ) + self.running_var = tf.get_variable("running_var", (self.dim,), dtype=self.dtype, initializer=tf.initializers.zeros(), trainable=False ) + + def get_weights(self): + return [self.weight, self.bias, self.running_mean, self.running_var] + + def forward(self, x): + if nn.data_format == "NHWC": + shape = (1,1,1,self.dim) + else: + shape = (1,self.dim,1,1) + + weight = tf.reshape ( self.weight , shape ) + bias = tf.reshape ( self.bias , shape ) + running_mean = tf.reshape ( self.running_mean, shape ) + running_var = tf.reshape ( self.running_var , shape ) + + x = (x - running_mean) / tf.sqrt( running_var + self.eps ) + x *= weight + x += bias + return x + +nn.BatchNorm2D = BatchNorm2D \ No newline at end of file diff --git a/face_detect/core/leras/layers/BlurPool.py b/face_detect/core/leras/layers/BlurPool.py new file mode 100644 index 0000000000000000000000000000000000000000..2c499407c886ede80f4b18b31560057462de534a --- /dev/null +++ b/face_detect/core/leras/layers/BlurPool.py @@ -0,0 +1,50 @@ +import numpy as np +from face_feature.core.leras import nn +tf = nn.tf + +class BlurPool(nn.LayerBase): + def __init__(self, filt_size=3, stride=2, **kwargs ): + + if nn.data_format == "NHWC": + self.strides = [1,stride,stride,1] + else: + self.strides = [1,1,stride,stride] + + self.filt_size = filt_size + pad = [ int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2)) ] + + if nn.data_format == "NHWC": + self.padding = [ [0,0], pad, pad, [0,0] ] + else: + self.padding = [ [0,0], [0,0], pad, pad ] + + if(self.filt_size==1): + a = np.array([1.,]) + elif(self.filt_size==2): + a = np.array([1., 1.]) + elif(self.filt_size==3): + a = np.array([1., 2., 1.]) + elif(self.filt_size==4): + a = np.array([1., 3., 3., 1.]) + elif(self.filt_size==5): + a = np.array([1., 4., 6., 4., 1.]) + elif(self.filt_size==6): + a = np.array([1., 5., 10., 10., 5., 1.]) + elif(self.filt_size==7): + a = np.array([1., 6., 15., 20., 15., 6., 1.]) + + a = a[:,None]*a[None,:] + a = a / np.sum(a) + a = a[:,:,None,None] + self.a = a + super().__init__(**kwargs) + + def build_weights(self): + self.k = tf.constant (self.a, dtype=nn.floatx ) + + def forward(self, x): + k = tf.tile (self.k, (1,1,x.shape[nn.conv2d_ch_axis],1) ) + x = tf.pad(x, self.padding ) + x = tf.nn.depthwise_conv2d(x, k, self.strides, 'VALID', data_format=nn.data_format) + return x +nn.BlurPool = BlurPool \ No newline at end of file diff --git a/face_detect/core/leras/layers/Conv2D.py b/face_detect/core/leras/layers/Conv2D.py new file mode 100644 index 0000000000000000000000000000000000000000..5308267916929fdf1668c64a9df8e2f9d072f1e8 --- /dev/null +++ b/face_detect/core/leras/layers/Conv2D.py @@ -0,0 +1,112 @@ +import numpy as np +from face_feature.core.leras import nn +tf = nn.tf + +class Conv2D(nn.LayerBase): + """ + default kernel_initializer - CA + use_wscale bool enables equalized learning rate, if kernel_initializer is None, it will be forced to random_normal + + + """ + def __init__(self, in_ch, out_ch, kernel_size, strides=1, padding='SAME', dilations=1, use_bias=True, use_wscale=False, kernel_initializer=None, bias_initializer=None, trainable=True, dtype=None, **kwargs ): + if not isinstance(strides, int): + raise ValueError ("strides must be an int type") + if not isinstance(dilations, int): + raise ValueError ("dilations must be an int type") + kernel_size = int(kernel_size) + + if dtype is None: + dtype = nn.floatx + + if isinstance(padding, str): + if padding == "SAME": + padding = ( (kernel_size - 1) * dilations + 1 ) // 2 + elif padding == "VALID": + padding = 0 + else: + raise ValueError ("Wrong padding type. Should be VALID SAME or INT or 4x INTs") + + if isinstance(padding, int): + if padding != 0: + if nn.data_format == "NHWC": + padding = [ [0,0], [padding,padding], [padding,padding], [0,0] ] + else: + padding = [ [0,0], [0,0], [padding,padding], [padding,padding] ] + else: + padding = None + + if nn.data_format == "NHWC": + strides = [1,strides,strides,1] + else: + strides = [1,1,strides,strides] + + if nn.data_format == "NHWC": + dilations = [1,dilations,dilations,1] + else: + dilations = [1,1,dilations,dilations] + + self.in_ch = in_ch + self.out_ch = out_ch + self.kernel_size = kernel_size + self.strides = strides + self.padding = padding + self.dilations = dilations + self.use_bias = use_bias + self.use_wscale = use_wscale + self.kernel_initializer = kernel_initializer + self.bias_initializer = bias_initializer + self.trainable = trainable + self.dtype = dtype + super().__init__(**kwargs) + + def build_weights(self): + kernel_initializer = self.kernel_initializer + if self.use_wscale: + gain = 1.0 if self.kernel_size == 1 else np.sqrt(2) + fan_in = self.kernel_size*self.kernel_size*self.in_ch + he_std = gain / np.sqrt(fan_in) + self.wscale = tf.constant(he_std, dtype=self.dtype ) + if kernel_initializer is None: + kernel_initializer = tf.initializers.random_normal(0, 1.0, dtype=self.dtype) + + if kernel_initializer is None: + kernel_initializer = nn.initializers.ca() + + self.weight = tf.get_variable("weight", (self.kernel_size,self.kernel_size,self.in_ch,self.out_ch), dtype=self.dtype, initializer=kernel_initializer, trainable=self.trainable ) + + if self.use_bias: + bias_initializer = self.bias_initializer + if bias_initializer is None: + bias_initializer = tf.initializers.zeros(dtype=self.dtype) + + self.bias = tf.get_variable("bias", (self.out_ch,), dtype=self.dtype, initializer=bias_initializer, trainable=self.trainable ) + + def get_weights(self): + weights = [self.weight] + if self.use_bias: + weights += [self.bias] + return weights + + def forward(self, x): + weight = self.weight + if self.use_wscale: + weight = weight * self.wscale + + if self.padding is not None: + x = tf.pad (x, self.padding, mode='CONSTANT') + + x = tf.nn.conv2d(x, weight, self.strides, 'VALID', dilations=self.dilations, data_format=nn.data_format) + if self.use_bias: + if nn.data_format == "NHWC": + bias = tf.reshape (self.bias, (1,1,1,self.out_ch) ) + else: + bias = tf.reshape (self.bias, (1,self.out_ch,1,1) ) + x = tf.add(x, bias) + return x + + def __str__(self): + r = f"{self.__class__.__name__} : in_ch:{self.in_ch} out_ch:{self.out_ch} " + + return r +nn.Conv2D = Conv2D \ No newline at end of file diff --git a/face_detect/core/leras/layers/Conv2DTranspose.py b/face_detect/core/leras/layers/Conv2DTranspose.py new file mode 100644 index 0000000000000000000000000000000000000000..744da34536fd01852b4fd9af683f240f45fca6b0 --- /dev/null +++ b/face_detect/core/leras/layers/Conv2DTranspose.py @@ -0,0 +1,107 @@ +import numpy as np +from face_feature.core.leras import nn +tf = nn.tf + +class Conv2DTranspose(nn.LayerBase): + """ + use_wscale enables weight scale (equalized learning rate) + if kernel_initializer is None, it will be forced to random_normal + """ + def __init__(self, in_ch, out_ch, kernel_size, strides=2, padding='SAME', use_bias=True, use_wscale=False, kernel_initializer=None, bias_initializer=None, trainable=True, dtype=None, **kwargs ): + if not isinstance(strides, int): + raise ValueError ("strides must be an int type") + kernel_size = int(kernel_size) + + if dtype is None: + dtype = nn.floatx + + self.in_ch = in_ch + self.out_ch = out_ch + self.kernel_size = kernel_size + self.strides = strides + self.padding = padding + self.use_bias = use_bias + self.use_wscale = use_wscale + self.kernel_initializer = kernel_initializer + self.bias_initializer = bias_initializer + self.trainable = trainable + self.dtype = dtype + super().__init__(**kwargs) + + def build_weights(self): + kernel_initializer = self.kernel_initializer + if self.use_wscale: + gain = 1.0 if self.kernel_size == 1 else np.sqrt(2) + fan_in = self.kernel_size*self.kernel_size*self.in_ch + he_std = gain / np.sqrt(fan_in) # He init + self.wscale = tf.constant(he_std, dtype=self.dtype ) + if kernel_initializer is None: + kernel_initializer = tf.initializers.random_normal(0, 1.0, dtype=self.dtype) + + if kernel_initializer is None: + kernel_initializer = nn.initializers.ca() + self.weight = tf.get_variable("weight", (self.kernel_size,self.kernel_size,self.out_ch,self.in_ch), dtype=self.dtype, initializer=kernel_initializer, trainable=self.trainable ) + + if self.use_bias: + bias_initializer = self.bias_initializer + if bias_initializer is None: + bias_initializer = tf.initializers.zeros(dtype=self.dtype) + + self.bias = tf.get_variable("bias", (self.out_ch,), dtype=self.dtype, initializer=bias_initializer, trainable=self.trainable ) + + def get_weights(self): + weights = [self.weight] + if self.use_bias: + weights += [self.bias] + return weights + + def forward(self, x): + shape = x.shape + + if nn.data_format == "NHWC": + h,w,c = shape[1], shape[2], shape[3] + output_shape = tf.stack ( (tf.shape(x)[0], + self.deconv_length(w, self.strides, self.kernel_size, self.padding), + self.deconv_length(h, self.strides, self.kernel_size, self.padding), + self.out_ch) ) + + strides = [1,self.strides,self.strides,1] + else: + c,h,w = shape[1], shape[2], shape[3] + output_shape = tf.stack ( (tf.shape(x)[0], + self.out_ch, + self.deconv_length(w, self.strides, self.kernel_size, self.padding), + self.deconv_length(h, self.strides, self.kernel_size, self.padding), + ) ) + strides = [1,1,self.strides,self.strides] + weight = self.weight + if self.use_wscale: + weight = weight * self.wscale + + x = tf.nn.conv2d_transpose(x, weight, output_shape, strides, padding=self.padding, data_format=nn.data_format) + + if self.use_bias: + if nn.data_format == "NHWC": + bias = tf.reshape (self.bias, (1,1,1,self.out_ch) ) + else: + bias = tf.reshape (self.bias, (1,self.out_ch,1,1) ) + x = tf.add(x, bias) + return x + + def __str__(self): + r = f"{self.__class__.__name__} : in_ch:{self.in_ch} out_ch:{self.out_ch} " + + return r + + def deconv_length(self, dim_size, stride_size, kernel_size, padding): + assert padding in {'SAME', 'VALID', 'FULL'} + if dim_size is None: + return None + if padding == 'VALID': + dim_size = dim_size * stride_size + max(kernel_size - stride_size, 0) + elif padding == 'FULL': + dim_size = dim_size * stride_size - (stride_size + kernel_size - 2) + elif padding == 'SAME': + dim_size = dim_size * stride_size + return dim_size +nn.Conv2DTranspose = Conv2DTranspose \ No newline at end of file diff --git a/face_detect/core/leras/layers/Dense.py b/face_detect/core/leras/layers/Dense.py new file mode 100644 index 0000000000000000000000000000000000000000..d51e6b8e45b6d64632a2756d154318d7837f225b --- /dev/null +++ b/face_detect/core/leras/layers/Dense.py @@ -0,0 +1,76 @@ +import numpy as np +from face_feature.core.leras import nn +tf = nn.tf + +class Dense(nn.LayerBase): + def __init__(self, in_ch, out_ch, use_bias=True, use_wscale=False, maxout_ch=0, kernel_initializer=None, bias_initializer=None, trainable=True, dtype=None, **kwargs ): + """ + use_wscale enables weight scale (equalized learning rate) + if kernel_initializer is None, it will be forced to random_normal + + maxout_ch https://link.springer.com/article/10.1186/s40537-019-0233-0 + typical 2-4 if you want to enable DenseMaxout behaviour + """ + self.in_ch = in_ch + self.out_ch = out_ch + self.use_bias = use_bias + self.use_wscale = use_wscale + self.maxout_ch = maxout_ch + self.kernel_initializer = kernel_initializer + self.bias_initializer = bias_initializer + self.trainable = trainable + if dtype is None: + dtype = nn.floatx + + self.dtype = dtype + super().__init__(**kwargs) + + def build_weights(self): + if self.maxout_ch > 1: + weight_shape = (self.in_ch,self.out_ch*self.maxout_ch) + else: + weight_shape = (self.in_ch,self.out_ch) + + kernel_initializer = self.kernel_initializer + + if self.use_wscale: + gain = 1.0 + fan_in = np.prod( weight_shape[:-1] ) + he_std = gain / np.sqrt(fan_in) # He init + self.wscale = tf.constant(he_std, dtype=self.dtype ) + if kernel_initializer is None: + kernel_initializer = tf.initializers.random_normal(0, 1.0, dtype=self.dtype) + + if kernel_initializer is None: + kernel_initializer = tf.initializers.glorot_uniform(dtype=self.dtype) + + self.weight = tf.get_variable("weight", weight_shape, dtype=self.dtype, initializer=kernel_initializer, trainable=self.trainable ) + + if self.use_bias: + bias_initializer = self.bias_initializer + if bias_initializer is None: + bias_initializer = tf.initializers.zeros(dtype=self.dtype) + self.bias = tf.get_variable("bias", (self.out_ch,), dtype=self.dtype, initializer=bias_initializer, trainable=self.trainable ) + + def get_weights(self): + weights = [self.weight] + if self.use_bias: + weights += [self.bias] + return weights + + def forward(self, x): + weight = self.weight + if self.use_wscale: + weight = weight * self.wscale + + x = tf.matmul(x, weight) + + if self.maxout_ch > 1: + x = tf.reshape (x, (-1, self.out_ch, self.maxout_ch) ) + x = tf.reduce_max(x, axis=-1) + + if self.use_bias: + x = tf.add(x, tf.reshape(self.bias, (1,self.out_ch) ) ) + + return x +nn.Dense = Dense \ No newline at end of file diff --git a/face_detect/core/leras/layers/DenseNorm.py b/face_detect/core/leras/layers/DenseNorm.py new file mode 100644 index 0000000000000000000000000000000000000000..e76b14850651b34d455ac4d2a4202261eacb2dec --- /dev/null +++ b/face_detect/core/leras/layers/DenseNorm.py @@ -0,0 +1,16 @@ +from face_feature.core.leras import nn +tf = nn.tf + +class DenseNorm(nn.LayerBase): + def __init__(self, dense=False, eps=1e-06, dtype=None, **kwargs): + self.dense = dense + if dtype is None: + dtype = nn.floatx + self.eps = tf.constant(eps, dtype=dtype, name="epsilon") + + super().__init__(**kwargs) + + def __call__(self, x): + return x * tf.rsqrt(tf.reduce_mean(tf.square(x), axis=-1, keepdims=True) + self.eps) + +nn.DenseNorm = DenseNorm \ No newline at end of file diff --git a/face_detect/core/leras/layers/DepthwiseConv2D.py b/face_detect/core/leras/layers/DepthwiseConv2D.py new file mode 100644 index 0000000000000000000000000000000000000000..21401c1334e7e23f55926df22cf3ea66b2118daa --- /dev/null +++ b/face_detect/core/leras/layers/DepthwiseConv2D.py @@ -0,0 +1,110 @@ +import numpy as np +from face_feature.core.leras import nn +tf = nn.tf + +class DepthwiseConv2D(nn.LayerBase): + """ + default kernel_initializer - CA + use_wscale bool enables equalized learning rate, if kernel_initializer is None, it will be forced to random_normal + """ + def __init__(self, in_ch, kernel_size, strides=1, padding='SAME', depth_multiplier=1, dilations=1, use_bias=True, use_wscale=False, kernel_initializer=None, bias_initializer=None, trainable=True, dtype=None, **kwargs ): + if not isinstance(strides, int): + raise ValueError ("strides must be an int type") + if not isinstance(dilations, int): + raise ValueError ("dilations must be an int type") + kernel_size = int(kernel_size) + + if dtype is None: + dtype = nn.floatx + + if isinstance(padding, str): + if padding == "SAME": + padding = ( (kernel_size - 1) * dilations + 1 ) // 2 + elif padding == "VALID": + padding = 0 + else: + raise ValueError ("Wrong padding type. Should be VALID SAME or INT or 4x INTs") + + if isinstance(padding, int): + if padding != 0: + if nn.data_format == "NHWC": + padding = [ [0,0], [padding,padding], [padding,padding], [0,0] ] + else: + padding = [ [0,0], [0,0], [padding,padding], [padding,padding] ] + else: + padding = None + + if nn.data_format == "NHWC": + strides = [1,strides,strides,1] + else: + strides = [1,1,strides,strides] + + if nn.data_format == "NHWC": + dilations = [1,dilations,dilations,1] + else: + dilations = [1,1,dilations,dilations] + + self.in_ch = in_ch + self.depth_multiplier = depth_multiplier + self.kernel_size = kernel_size + self.strides = strides + self.padding = padding + self.dilations = dilations + self.use_bias = use_bias + self.use_wscale = use_wscale + self.kernel_initializer = kernel_initializer + self.bias_initializer = bias_initializer + self.trainable = trainable + self.dtype = dtype + super().__init__(**kwargs) + + def build_weights(self): + kernel_initializer = self.kernel_initializer + if self.use_wscale: + gain = 1.0 if self.kernel_size == 1 else np.sqrt(2) + fan_in = self.kernel_size*self.kernel_size*self.in_ch + he_std = gain / np.sqrt(fan_in) + self.wscale = tf.constant(he_std, dtype=self.dtype ) + if kernel_initializer is None: + kernel_initializer = tf.initializers.random_normal(0, 1.0, dtype=self.dtype) + + if kernel_initializer is None: + kernel_initializer = nn.initializers.ca() + + self.weight = tf.get_variable("weight", (self.kernel_size,self.kernel_size,self.in_ch,self.depth_multiplier), dtype=self.dtype, initializer=kernel_initializer, trainable=self.trainable ) + + if self.use_bias: + bias_initializer = self.bias_initializer + if bias_initializer is None: + bias_initializer = tf.initializers.zeros(dtype=self.dtype) + + self.bias = tf.get_variable("bias", (self.in_ch*self.depth_multiplier,), dtype=self.dtype, initializer=bias_initializer, trainable=self.trainable ) + + def get_weights(self): + weights = [self.weight] + if self.use_bias: + weights += [self.bias] + return weights + + def forward(self, x): + weight = self.weight + if self.use_wscale: + weight = weight * self.wscale + + if self.padding is not None: + x = tf.pad (x, self.padding, mode='CONSTANT') + + x = tf.nn.depthwise_conv2d(x, weight, self.strides, 'VALID', data_format=nn.data_format) + if self.use_bias: + if nn.data_format == "NHWC": + bias = tf.reshape (self.bias, (1,1,1,self.in_ch*self.depth_multiplier) ) + else: + bias = tf.reshape (self.bias, (1,self.in_ch*self.depth_multiplier,1,1) ) + x = tf.add(x, bias) + return x + + def __str__(self): + r = f"{self.__class__.__name__} : in_ch:{self.in_ch} depth_multiplier:{self.depth_multiplier} " + return r + +nn.DepthwiseConv2D = DepthwiseConv2D \ No newline at end of file diff --git a/face_detect/core/leras/layers/FRNorm2D.py b/face_detect/core/leras/layers/FRNorm2D.py new file mode 100644 index 0000000000000000000000000000000000000000..244fa755bbb101af6c7ef4e7c1d2200c51418fe6 --- /dev/null +++ b/face_detect/core/leras/layers/FRNorm2D.py @@ -0,0 +1,38 @@ +from face_feature.core.leras import nn +tf = nn.tf + +class FRNorm2D(nn.LayerBase): + """ + Tensorflow implementation of + Filter Response Normalization Layer: Eliminating Batch Dependence in theTraining of Deep Neural Networks + https://arxiv.org/pdf/1911.09737.pdf + """ + def __init__(self, in_ch, dtype=None, **kwargs): + self.in_ch = in_ch + + if dtype is None: + dtype = nn.floatx + self.dtype = dtype + + super().__init__(**kwargs) + + def build_weights(self): + self.weight = tf.get_variable("weight", (self.in_ch,), dtype=self.dtype, initializer=tf.initializers.ones() ) + self.bias = tf.get_variable("bias", (self.in_ch,), dtype=self.dtype, initializer=tf.initializers.zeros() ) + self.eps = tf.get_variable("eps", (1,), dtype=self.dtype, initializer=tf.initializers.constant(1e-6) ) + + def get_weights(self): + return [self.weight, self.bias, self.eps] + + def forward(self, x): + if nn.data_format == "NHWC": + shape = (1,1,1,self.in_ch) + else: + shape = (1,self.in_ch,1,1) + weight = tf.reshape ( self.weight, shape ) + bias = tf.reshape ( self.bias , shape ) + nu2 = tf.reduce_mean(tf.square(x), axis=nn.conv2d_spatial_axes, keepdims=True) + x = x * ( 1.0/tf.sqrt(nu2 + tf.abs(self.eps) ) ) + + return x*weight + bias +nn.FRNorm2D = FRNorm2D \ No newline at end of file diff --git a/face_detect/core/leras/layers/InstanceNorm2D.py b/face_detect/core/leras/layers/InstanceNorm2D.py new file mode 100644 index 0000000000000000000000000000000000000000..eaa5f3a02122ef733f71aa5d433b0b0f6082269d --- /dev/null +++ b/face_detect/core/leras/layers/InstanceNorm2D.py @@ -0,0 +1,40 @@ +from face_feature.core.leras import nn +tf = nn.tf + +class InstanceNorm2D(nn.LayerBase): + def __init__(self, in_ch, dtype=None, **kwargs): + self.in_ch = in_ch + + if dtype is None: + dtype = nn.floatx + self.dtype = dtype + + super().__init__(**kwargs) + + def build_weights(self): + kernel_initializer = tf.initializers.glorot_uniform(dtype=self.dtype) + self.weight = tf.get_variable("weight", (self.in_ch,), dtype=self.dtype, initializer=kernel_initializer ) + self.bias = tf.get_variable("bias", (self.in_ch,), dtype=self.dtype, initializer=tf.initializers.zeros() ) + + def get_weights(self): + return [self.weight, self.bias] + + def forward(self, x): + if nn.data_format == "NHWC": + shape = (1,1,1,self.in_ch) + else: + shape = (1,self.in_ch,1,1) + + weight = tf.reshape ( self.weight , shape ) + bias = tf.reshape ( self.bias , shape ) + + x_mean = tf.reduce_mean(x, axis=nn.conv2d_spatial_axes, keepdims=True ) + x_std = tf.math.reduce_std(x, axis=nn.conv2d_spatial_axes, keepdims=True ) + 1e-5 + + x = (x - x_mean) / x_std + x *= weight + x += bias + + return x + +nn.InstanceNorm2D = InstanceNorm2D \ No newline at end of file diff --git a/face_detect/core/leras/layers/LayerBase.py b/face_detect/core/leras/layers/LayerBase.py new file mode 100644 index 0000000000000000000000000000000000000000..157126eb4ea2b8ac7b80446430e81e82b13c5127 --- /dev/null +++ b/face_detect/core/leras/layers/LayerBase.py @@ -0,0 +1,16 @@ +from face_feature.core.leras import nn +tf = nn.tf + +class LayerBase(nn.Saveable): + #override + def build_weights(self): + pass + + #override + def forward(self, *args, **kwargs): + pass + + def __call__(self, *args, **kwargs): + return self.forward(*args, **kwargs) + +nn.LayerBase = LayerBase \ No newline at end of file diff --git a/face_detect/core/leras/layers/Saveable.py b/face_detect/core/leras/layers/Saveable.py new file mode 100644 index 0000000000000000000000000000000000000000..cafbd4eab6b468d659fc69ade8c8cfc73944dd75 --- /dev/null +++ b/face_detect/core/leras/layers/Saveable.py @@ -0,0 +1,106 @@ +import pickle +from pathlib import Path +from face_feature.core import pathex +import numpy as np + +from face_feature.core.leras import nn + +tf = nn.tf + +class Saveable(): + def __init__(self, name=None): + self.name = name + + #override + def get_weights(self): + #return tf tensors that should be initialized/loaded/saved + return [] + + #override + def get_weights_np(self): + weights = self.get_weights() + if len(weights) == 0: + return [] + return nn.tf_sess.run (weights) + + def set_weights(self, new_weights): + weights = self.get_weights() + if len(weights) != len(new_weights): + raise ValueError ('len of lists mismatch') + + tuples = [] + for w, new_w in zip(weights, new_weights): + + if len(w.shape) != new_w.shape: + new_w = new_w.reshape(w.shape) + + tuples.append ( (w, new_w) ) + + nn.batch_set_value (tuples) + + def save_weights(self, filename, force_dtype=None): + d = {} + weights = self.get_weights() + + if self.name is None: + raise Exception("name must be defined.") + + name = self.name + for w, w_val in zip(weights, nn.tf_sess.run (weights)): + w_name_split = w.name.split('/', 1) + if name != w_name_split[0]: + raise Exception("weight first name != Saveable.name") + + if force_dtype is not None: + w_val = w_val.astype(force_dtype) + + d[ w_name_split[1] ] = w_val + + d_dumped = pickle.dumps (d, 4) + pathex.write_bytes_safe ( Path(filename), d_dumped ) + + def load_weights(self, filename): + """ + returns True if file exists + """ + filepath = Path(filename) + if filepath.exists(): + result = True + d_dumped = filepath.read_bytes() + d = pickle.loads(d_dumped) + else: + return False + + weights = self.get_weights() + + if self.name is None: + raise Exception("name must be defined.") + + try: + tuples = [] + for w in weights: + w_name_split = w.name.split('/') + if self.name != w_name_split[0]: + raise Exception("weight first name != Saveable.name") + + sub_w_name = "/".join(w_name_split[1:]) + + w_val = d.get(sub_w_name, None) + + if w_val is None: + #io.log_err(f"Weight {w.name} was not loaded from file {filename}") + tuples.append ( (w, w.initializer) ) + else: + w_val = np.reshape( w_val, w.shape.as_list() ) + tuples.append ( (w, w_val) ) + + nn.batch_set_value(tuples) + except: + return False + + return True + + def init_weights(self): + nn.init_weights(self.get_weights()) + +nn.Saveable = Saveable diff --git a/face_detect/core/leras/layers/ScaleAdd.py b/face_detect/core/leras/layers/ScaleAdd.py new file mode 100644 index 0000000000000000000000000000000000000000..29f2063f5b61421585c8c4f4b8fc855f43c8b888 --- /dev/null +++ b/face_detect/core/leras/layers/ScaleAdd.py @@ -0,0 +1,31 @@ +from face_feature.core.leras import nn +tf = nn.tf + +class ScaleAdd(nn.LayerBase): + def __init__(self, ch, dtype=None, **kwargs): + if dtype is None: + dtype = nn.floatx + self.dtype = dtype + self.ch = ch + + super().__init__(**kwargs) + + def build_weights(self): + self.weight = tf.get_variable("weight",(self.ch,), dtype=self.dtype, initializer=tf.initializers.zeros() ) + + def get_weights(self): + return [self.weight] + + def forward(self, inputs): + if nn.data_format == "NHWC": + shape = (1,1,1,self.ch) + else: + shape = (1,self.ch,1,1) + + weight = tf.reshape ( self.weight, shape ) + + x0, x1 = inputs + x = x0 + x1*weight + + return x +nn.ScaleAdd = ScaleAdd \ No newline at end of file diff --git a/face_detect/core/leras/layers/TLU.py b/face_detect/core/leras/layers/TLU.py new file mode 100644 index 0000000000000000000000000000000000000000..444e3caa2cf4366863dfb8e1e56355390dc98640 --- /dev/null +++ b/face_detect/core/leras/layers/TLU.py @@ -0,0 +1,33 @@ +from face_feature.core.leras import nn +tf = nn.tf + +class TLU(nn.LayerBase): + """ + Tensorflow implementation of + Filter Response Normalization Layer: Eliminating Batch Dependence in theTraining of Deep Neural Networks + https://arxiv.org/pdf/1911.09737.pdf + """ + def __init__(self, in_ch, dtype=None, **kwargs): + self.in_ch = in_ch + + if dtype is None: + dtype = nn.floatx + self.dtype = dtype + + super().__init__(**kwargs) + + def build_weights(self): + self.tau = tf.get_variable("tau", (self.in_ch,), dtype=self.dtype, initializer=tf.initializers.zeros() ) + + def get_weights(self): + return [self.tau] + + def forward(self, x): + if nn.data_format == "NHWC": + shape = (1,1,1,self.in_ch) + else: + shape = (1,self.in_ch,1,1) + + tau = tf.reshape ( self.tau, shape ) + return tf.math.maximum(x, tau) +nn.TLU = TLU \ No newline at end of file diff --git a/face_detect/core/leras/layers/__init__.py b/face_detect/core/leras/layers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1c81963fee6d120ee41bca6cf5711828598a6774 --- /dev/null +++ b/face_detect/core/leras/layers/__init__.py @@ -0,0 +1,16 @@ +from .Saveable import * +from .LayerBase import * + +from .Conv2D import * +from .Conv2DTranspose import * +from .DepthwiseConv2D import * +from .Dense import * +from .BlurPool import * + +from .BatchNorm2D import * +from .FRNorm2D import * + +from .TLU import * +from .ScaleAdd import * +from .DenseNorm import * +from .AdaIN import * \ No newline at end of file diff --git a/face_detect/core/leras/models/CodeDiscriminator.py b/face_detect/core/leras/models/CodeDiscriminator.py new file mode 100644 index 0000000000000000000000000000000000000000..506bb54b6393b77b091ca12da5ff58c33755dac9 --- /dev/null +++ b/face_detect/core/leras/models/CodeDiscriminator.py @@ -0,0 +1,22 @@ +from face_feature.core.leras import nn +tf = nn.tf + +class CodeDiscriminator(nn.ModelBase): + def on_build(self, in_ch, code_res, ch=256, conv_kernel_initializer=None): + n_downscales = 1 + code_res // 8 + + self.convs = [] + prev_ch = in_ch + for i in range(n_downscales): + cur_ch = ch * min( (2**i), 8 ) + self.convs.append ( nn.Conv2D( prev_ch, cur_ch, kernel_size=4 if i == 0 else 3, strides=2, padding='SAME', kernel_initializer=conv_kernel_initializer) ) + prev_ch = cur_ch + + self.out_conv = nn.Conv2D( prev_ch, 1, kernel_size=1, padding='VALID', kernel_initializer=conv_kernel_initializer) + + def forward(self, x): + for conv in self.convs: + x = tf.nn.leaky_relu( conv(x), 0.1 ) + return self.out_conv(x) + +nn.CodeDiscriminator = CodeDiscriminator \ No newline at end of file diff --git a/face_detect/core/leras/models/ModelBase.py b/face_detect/core/leras/models/ModelBase.py new file mode 100644 index 0000000000000000000000000000000000000000..f620eda0eb97e7def548d1308385a7b86fafcce9 --- /dev/null +++ b/face_detect/core/leras/models/ModelBase.py @@ -0,0 +1,245 @@ +import types +import numpy as np +# from face_feature.core.interact import interact as io +from face_feature.core.leras import nn +from face_feature.core.leras.layers.Saveable import Saveable +tf = nn.tf + +class ModelBase(Saveable): + def __init__(self, *args, name=None, **kwargs): + super().__init__(name=name) + self.layers = [] + self.layers_by_name = {} + self.built = False + self.args = args + self.kwargs = kwargs + self.run_placeholders = None + + def _build_sub(self, layer, name): + if isinstance (layer, list): + for i,sublayer in enumerate(layer): + self._build_sub(sublayer, f"{name}_{i}") + elif isinstance (layer, dict): + for subname in layer.keys(): + sublayer = layer[subname] + self._build_sub(sublayer, f"{name}_{subname}") + elif isinstance (layer, nn.LayerBase) or \ + isinstance (layer, ModelBase): + + if layer.name is None: + layer.name = name + + if isinstance (layer, nn.LayerBase): + with tf.variable_scope(layer.name): + layer.build_weights() + elif isinstance (layer, ModelBase): + layer.build() + + self.layers.append (layer) + self.layers_by_name[layer.name] = layer + + def xor_list(self, lst1, lst2): + return [value for value in lst1+lst2 if (value not in lst1) or (value not in lst2) ] + + def build(self): + with tf.variable_scope(self.name): + + current_vars = [] + generator = None + while True: + + if generator is None: + generator = self.on_build(*self.args, **self.kwargs) + if not isinstance(generator, types.GeneratorType): + generator = None + + if generator is not None: + try: + next(generator) + except StopIteration: + generator = None + + v = vars(self) + new_vars = self.xor_list (current_vars, list(v.keys()) ) + + for name in new_vars: + self._build_sub(v[name],name) + + current_vars += new_vars + + if generator is None: + break + + self.built = True + + #override + def get_weights(self): + if not self.built: + self.build() + + weights = [] + for layer in self.layers: + weights += layer.get_weights() + return weights + + def get_layer_by_name(self, name): + return self.layers_by_name.get(name, None) + + def get_layers(self): + if not self.built: + self.build() + layers = [] + for layer in self.layers: + if isinstance (layer, nn.LayerBase): + layers.append(layer) + else: + layers += layer.get_layers() + return layers + + #override + def on_build(self, *args, **kwargs): + """ + init model layers here + + return 'yield' if build is not finished + therefore dependency models will be initialized + """ + pass + + #override + def forward(self, *args, **kwargs): + #flow layers/models/tensors here + pass + + def __call__(self, *args, **kwargs): + if not self.built: + self.build() + + return self.forward(*args, **kwargs) + + # def compute_output_shape(self, shapes): + # if not self.built: + # self.build() + + # not_list = False + # if not isinstance(shapes, list): + # not_list = True + # shapes = [shapes] + + # with tf.device('/CPU:0'): + # # CPU tensors will not impact any performance, only slightly RAM "leakage" + # phs = [] + # for dtype,sh in shapes: + # phs += [ tf.placeholder(dtype, sh) ] + + # result = self.__call__(phs[0] if not_list else phs) + + # if not isinstance(result, list): + # result = [result] + + # result_shapes = [] + + # for t in result: + # result_shapes += [ t.shape.as_list() ] + + # return result_shapes[0] if not_list else result_shapes + + def build_for_run(self, shapes_list): + if not isinstance(shapes_list, list): + raise ValueError("shapes_list must be a list.") + + self.run_placeholders = [] + for dtype,sh in shapes_list: + self.run_placeholders.append ( tf.placeholder(dtype, sh) ) + + self.run_output = self.__call__(self.run_placeholders) + + def run (self, inputs): + if self.run_placeholders is None: + raise Exception ("Model didn't build for run.") + + if len(inputs) != len(self.run_placeholders): + raise ValueError("len(inputs) != self.run_placeholders") + + feed_dict = {} + for ph, inp in zip(self.run_placeholders, inputs): + feed_dict[ph] = inp + + return nn.tf_sess.run ( self.run_output, feed_dict=feed_dict) + + def summary(self): + layers = self.get_layers() + layers_names = [] + layers_params = [] + + max_len_str = 0 + max_len_param_str = 0 + delim_str = "-" + + total_params = 0 + + #Get layers names and str lenght for delim + for l in layers: + if len(str(l))>max_len_str: + max_len_str = len(str(l)) + layers_names+=[str(l).capitalize()] + + #Get params for each layer + layers_params = [ int(np.sum(np.prod(w.shape) for w in l.get_weights())) for l in layers ] + total_params = np.sum(layers_params) + + #Get str lenght for delim + for p in layers_params: + if len(str(p))>max_len_param_str: + max_len_param_str=len(str(p)) + + #Set delim + for i in range(max_len_str+max_len_param_str+3): + delim_str += "-" + + output = "\n"+delim_str+"\n" + + #Format model name str + model_name_str = "| "+self.name.capitalize() + len_model_name_str = len(model_name_str) + for i in range(len(delim_str)-len_model_name_str): + model_name_str+= " " if i!=(len(delim_str)-len_model_name_str-2) else " |" + + output += model_name_str +"\n" + output += delim_str +"\n" + + + #Format layers table + for i in range(len(layers_names)): + output += delim_str +"\n" + + l_name = layers_names[i] + l_param = str(layers_params[i]) + l_param_str = "" + if len(l_name)<=max_len_str: + for i in range(max_len_str - len(l_name)): + l_name+= " " + + if len(l_param)<=max_len_param_str: + for i in range(max_len_param_str - len(l_param)): + l_param_str+= " " + + l_param_str += l_param + + + output +="| "+l_name+"|"+l_param_str+"| \n" + + output += delim_str +"\n" + + #Format sum of params + total_params_str = "| Total params count: "+str(total_params) + len_total_params_str = len(total_params_str) + for i in range(len(delim_str)-len_total_params_str): + total_params_str+= " " if i!=(len(delim_str)-len_total_params_str-2) else " |" + + output += total_params_str +"\n" + output += delim_str +"\n" + + # io.log_info(output) + +nn.ModelBase = ModelBase diff --git a/face_detect/core/leras/models/PatchDiscriminator.py b/face_detect/core/leras/models/PatchDiscriminator.py new file mode 100644 index 0000000000000000000000000000000000000000..c124c77a0fa5fe2ed331b622d90929bb5fdcd665 --- /dev/null +++ b/face_detect/core/leras/models/PatchDiscriminator.py @@ -0,0 +1,197 @@ +import numpy as np +from face_feature.core.leras import nn +tf = nn.tf + +patch_discriminator_kernels = \ + { 1 : (512, [ [1,1] ]), + 2 : (512, [ [2,1] ]), + 3 : (512, [ [2,1], [2,1] ]), + 4 : (512, [ [2,2], [2,2] ]), + 5 : (512, [ [3,2], [2,2] ]), + 6 : (512, [ [4,2], [2,2] ]), + 7 : (512, [ [3,2], [3,2] ]), + 8 : (512, [ [4,2], [3,2] ]), + 9 : (512, [ [3,2], [4,2] ]), + 10 : (512, [ [4,2], [4,2] ]), + 11 : (512, [ [3,2], [3,2], [2,1] ]), + 12 : (512, [ [4,2], [3,2], [2,1] ]), + 13 : (512, [ [3,2], [4,2], [2,1] ]), + 14 : (512, [ [4,2], [4,2], [2,1] ]), + 15 : (512, [ [3,2], [3,2], [3,1] ]), + 16 : (512, [ [4,2], [3,2], [3,1] ]), + 17 : (512, [ [3,2], [4,2], [3,1] ]), + 18 : (512, [ [4,2], [4,2], [3,1] ]), + 19 : (512, [ [3,2], [3,2], [4,1] ]), + 20 : (512, [ [4,2], [3,2], [4,1] ]), + 21 : (512, [ [3,2], [4,2], [4,1] ]), + 22 : (512, [ [4,2], [4,2], [4,1] ]), + 23 : (256, [ [3,2], [3,2], [3,2], [2,1] ]), + 24 : (256, [ [4,2], [3,2], [3,2], [2,1] ]), + 25 : (256, [ [3,2], [4,2], [3,2], [2,1] ]), + 26 : (256, [ [4,2], [4,2], [3,2], [2,1] ]), + 27 : (256, [ [3,2], [4,2], [4,2], [2,1] ]), + 28 : (256, [ [4,2], [3,2], [4,2], [2,1] ]), + 29 : (256, [ [3,2], [4,2], [4,2], [2,1] ]), + 30 : (256, [ [4,2], [4,2], [4,2], [2,1] ]), + 31 : (256, [ [3,2], [3,2], [3,2], [3,1] ]), + 32 : (256, [ [4,2], [3,2], [3,2], [3,1] ]), + 33 : (256, [ [3,2], [4,2], [3,2], [3,1] ]), + 34 : (256, [ [4,2], [4,2], [3,2], [3,1] ]), + 35 : (256, [ [3,2], [4,2], [4,2], [3,1] ]), + 36 : (256, [ [4,2], [3,2], [4,2], [3,1] ]), + 37 : (256, [ [3,2], [4,2], [4,2], [3,1] ]), + 38 : (256, [ [4,2], [4,2], [4,2], [3,1] ]), + 39 : (256, [ [3,2], [3,2], [3,2], [4,1] ]), + 40 : (256, [ [4,2], [3,2], [3,2], [4,1] ]), + 41 : (256, [ [3,2], [4,2], [3,2], [4,1] ]), + 42 : (256, [ [4,2], [4,2], [3,2], [4,1] ]), + 43 : (256, [ [3,2], [4,2], [4,2], [4,1] ]), + 44 : (256, [ [4,2], [3,2], [4,2], [4,1] ]), + 45 : (256, [ [3,2], [4,2], [4,2], [4,1] ]), + 46 : (256, [ [4,2], [4,2], [4,2], [4,1] ]), + } + + +class PatchDiscriminator(nn.ModelBase): + def on_build(self, patch_size, in_ch, base_ch=None, conv_kernel_initializer=None): + suggested_base_ch, kernels_strides = patch_discriminator_kernels[patch_size] + + if base_ch is None: + base_ch = suggested_base_ch + + prev_ch = in_ch + self.convs = [] + for i, (kernel_size, strides) in enumerate(kernels_strides): + cur_ch = base_ch * min( (2**i), 8 ) + + self.convs.append ( nn.Conv2D( prev_ch, cur_ch, kernel_size=kernel_size, strides=strides, padding='SAME', kernel_initializer=conv_kernel_initializer) ) + prev_ch = cur_ch + + self.out_conv = nn.Conv2D( prev_ch, 1, kernel_size=1, padding='VALID', kernel_initializer=conv_kernel_initializer) + + def forward(self, x): + for conv in self.convs: + x = tf.nn.leaky_relu( conv(x), 0.1 ) + return self.out_conv(x) + +nn.PatchDiscriminator = PatchDiscriminator + +class UNetPatchDiscriminator(nn.ModelBase): + """ + Inspired by https://arxiv.org/abs/2002.12655 "A U-Net Based Discriminator for Generative Adversarial Networks" + """ + def calc_receptive_field_size(self, layers): + """ + result the same as https://fomoro.com/research/article/receptive-field-calculatorindex.html + """ + rf = 0 + ts = 1 + for i, (k, s) in enumerate(layers): + if i == 0: + rf = k + else: + rf += (k-1)*ts + ts *= s + return rf + + def find_archi(self, target_patch_size, max_layers=9): + """ + Find the best configuration of layers using only 3x3 convs for target patch size + """ + s = {} + for layers_count in range(1,max_layers+1): + val = 1 << (layers_count-1) + while True: + val -= 1 + + layers = [] + sum_st = 0 + layers.append ( [3, 2]) + sum_st += 2 + for i in range(layers_count-1): + st = 1 + (1 if val & (1 << i) !=0 else 0 ) + layers.append ( [3, st ]) + sum_st += st + + rf = self.calc_receptive_field_size(layers) + + s_rf = s.get(rf, None) + if s_rf is None: + s[rf] = (layers_count, sum_st, layers) + else: + if layers_count < s_rf[0] or \ + ( layers_count == s_rf[0] and sum_st > s_rf[1] ): + s[rf] = (layers_count, sum_st, layers) + + if val == 0: + break + + x = sorted(list(s.keys())) + q=x[np.abs(np.array(x)-target_patch_size).argmin()] + return s[q][2] + + def on_build(self, patch_size, in_ch, base_ch = 16): + + class ResidualBlock(nn.ModelBase): + def on_build(self, ch, kernel_size=3 ): + self.conv1 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME') + self.conv2 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME') + + def forward(self, inp): + x = self.conv1(inp) + x = tf.nn.leaky_relu(x, 0.2) + x = self.conv2(x) + x = tf.nn.leaky_relu(inp + x, 0.2) + return x + + prev_ch = in_ch + self.convs = [] + self.res1 = [] + self.res2 = [] + self.upconvs = [] + self.upres1 = [] + self.upres2 = [] + layers = self.find_archi(patch_size) + + level_chs = { i-1:v for i,v in enumerate([ min( base_ch * (2**i), 512 ) for i in range(len(layers)+1)]) } + + self.in_conv = nn.Conv2D( in_ch, level_chs[-1], kernel_size=1, padding='VALID') + + for i, (kernel_size, strides) in enumerate(layers): + self.convs.append ( nn.Conv2D( level_chs[i-1], level_chs[i], kernel_size=kernel_size, strides=strides, padding='SAME') ) + + self.res1.append ( ResidualBlock(level_chs[i]) ) + self.res2.append ( ResidualBlock(level_chs[i]) ) + + self.upconvs.insert (0, nn.Conv2DTranspose( level_chs[i]*(2 if i != len(layers)-1 else 1), level_chs[i-1], kernel_size=kernel_size, strides=strides, padding='SAME') ) + + self.upres1.insert (0, ResidualBlock(level_chs[i-1]*2) ) + self.upres2.insert (0, ResidualBlock(level_chs[i-1]*2) ) + + self.out_conv = nn.Conv2D( level_chs[-1]*2, 1, kernel_size=1, padding='VALID') + + self.center_out = nn.Conv2D( level_chs[len(layers)-1], 1, kernel_size=1, padding='VALID') + self.center_conv = nn.Conv2D( level_chs[len(layers)-1], level_chs[len(layers)-1], kernel_size=1, padding='VALID') + + + def forward(self, x): + x = tf.nn.leaky_relu( self.in_conv(x), 0.2 ) + + encs = [] + for conv, res1,res2 in zip(self.convs, self.res1, self.res2): + encs.insert(0, x) + x = tf.nn.leaky_relu( conv(x), 0.2 ) + x = res1(x) + x = res2(x) + + center_out, x = self.center_out(x), tf.nn.leaky_relu( self.center_conv(x), 0.2 ) + + for i, (upconv, enc, upres1, upres2 ) in enumerate(zip(self.upconvs, encs, self.upres1, self.upres2)): + x = tf.nn.leaky_relu( upconv(x), 0.2 ) + x = tf.concat( [enc, x], axis=nn.conv2d_ch_axis) + x = upres1(x) + x = upres2(x) + + return center_out, self.out_conv(x) + +nn.UNetPatchDiscriminator = UNetPatchDiscriminator diff --git a/face_detect/core/leras/models/XSeg.py b/face_detect/core/leras/models/XSeg.py new file mode 100644 index 0000000000000000000000000000000000000000..b47b105656aa78027c9d6d3f56e9b32dcdff14e5 --- /dev/null +++ b/face_detect/core/leras/models/XSeg.py @@ -0,0 +1,158 @@ +from face_feature.core.leras import nn +tf = nn.tf + +class XSeg(nn.ModelBase): + + def on_build (self, in_ch, base_ch, out_ch): + + class ConvBlock(nn.ModelBase): + def on_build(self, in_ch, out_ch): + self.conv = nn.Conv2D (in_ch, out_ch, kernel_size=3, padding='SAME') + self.frn = nn.FRNorm2D(out_ch) + self.tlu = nn.TLU(out_ch) + + def forward(self, x): + x = self.conv(x) + x = self.frn(x) + x = self.tlu(x) + return x + + class UpConvBlock(nn.ModelBase): + def on_build(self, in_ch, out_ch): + self.conv = nn.Conv2DTranspose (in_ch, out_ch, kernel_size=3, padding='SAME') + self.frn = nn.FRNorm2D(out_ch) + self.tlu = nn.TLU(out_ch) + + def forward(self, x): + x = self.conv(x) + x = self.frn(x) + x = self.tlu(x) + return x + + self.base_ch = base_ch + + self.conv01 = ConvBlock(in_ch, base_ch) + self.conv02 = ConvBlock(base_ch, base_ch) + self.bp0 = nn.BlurPool (filt_size=4) + + self.conv11 = ConvBlock(base_ch, base_ch*2) + self.conv12 = ConvBlock(base_ch*2, base_ch*2) + self.bp1 = nn.BlurPool (filt_size=3) + + self.conv21 = ConvBlock(base_ch*2, base_ch*4) + self.conv22 = ConvBlock(base_ch*4, base_ch*4) + self.bp2 = nn.BlurPool (filt_size=2) + + self.conv31 = ConvBlock(base_ch*4, base_ch*8) + self.conv32 = ConvBlock(base_ch*8, base_ch*8) + self.conv33 = ConvBlock(base_ch*8, base_ch*8) + self.bp3 = nn.BlurPool (filt_size=2) + + self.conv41 = ConvBlock(base_ch*8, base_ch*8) + self.conv42 = ConvBlock(base_ch*8, base_ch*8) + self.conv43 = ConvBlock(base_ch*8, base_ch*8) + self.bp4 = nn.BlurPool (filt_size=2) + + self.conv51 = ConvBlock(base_ch*8, base_ch*8) + self.conv52 = ConvBlock(base_ch*8, base_ch*8) + self.conv53 = ConvBlock(base_ch*8, base_ch*8) + self.bp5 = nn.BlurPool (filt_size=2) + + self.dense1 = nn.Dense ( 4*4* base_ch*8, 512) + self.dense2 = nn.Dense ( 512, 4*4* base_ch*8) + + self.up5 = UpConvBlock (base_ch*8, base_ch*4) + self.uconv53 = ConvBlock(base_ch*12, base_ch*8) + self.uconv52 = ConvBlock(base_ch*8, base_ch*8) + self.uconv51 = ConvBlock(base_ch*8, base_ch*8) + + self.up4 = UpConvBlock (base_ch*8, base_ch*4) + self.uconv43 = ConvBlock(base_ch*12, base_ch*8) + self.uconv42 = ConvBlock(base_ch*8, base_ch*8) + self.uconv41 = ConvBlock(base_ch*8, base_ch*8) + + self.up3 = UpConvBlock (base_ch*8, base_ch*4) + self.uconv33 = ConvBlock(base_ch*12, base_ch*8) + self.uconv32 = ConvBlock(base_ch*8, base_ch*8) + self.uconv31 = ConvBlock(base_ch*8, base_ch*8) + + self.up2 = UpConvBlock (base_ch*8, base_ch*4) + self.uconv22 = ConvBlock(base_ch*8, base_ch*4) + self.uconv21 = ConvBlock(base_ch*4, base_ch*4) + + self.up1 = UpConvBlock (base_ch*4, base_ch*2) + self.uconv12 = ConvBlock(base_ch*4, base_ch*2) + self.uconv11 = ConvBlock(base_ch*2, base_ch*2) + + self.up0 = UpConvBlock (base_ch*2, base_ch) + self.uconv02 = ConvBlock(base_ch*2, base_ch) + self.uconv01 = ConvBlock(base_ch, base_ch) + self.out_conv = nn.Conv2D (base_ch, out_ch, kernel_size=3, padding='SAME') + + + def forward(self, inp): + x = inp + + x = self.conv01(x) + x = x0 = self.conv02(x) + x = self.bp0(x) + + x = self.conv11(x) + x = x1 = self.conv12(x) + x = self.bp1(x) + + x = self.conv21(x) + x = x2 = self.conv22(x) + x = self.bp2(x) + + x = self.conv31(x) + x = self.conv32(x) + x = x3 = self.conv33(x) + x = self.bp3(x) + + x = self.conv41(x) + x = self.conv42(x) + x = x4 = self.conv43(x) + x = self.bp4(x) + + x = self.conv51(x) + x = self.conv52(x) + x = x5 = self.conv53(x) + x = self.bp5(x) + + x = nn.flatten(x) + x = self.dense1(x) + x = self.dense2(x) + x = nn.reshape_4D (x, 4, 4, self.base_ch*8 ) + + x = self.up5(x) + x = self.uconv53(tf.concat([x,x5],axis=nn.conv2d_ch_axis)) + x = self.uconv52(x) + x = self.uconv51(x) + + x = self.up4(x) + x = self.uconv43(tf.concat([x,x4],axis=nn.conv2d_ch_axis)) + x = self.uconv42(x) + x = self.uconv41(x) + + x = self.up3(x) + x = self.uconv33(tf.concat([x,x3],axis=nn.conv2d_ch_axis)) + x = self.uconv32(x) + x = self.uconv31(x) + + x = self.up2(x) + x = self.uconv22(tf.concat([x,x2],axis=nn.conv2d_ch_axis)) + x = self.uconv21(x) + + x = self.up1(x) + x = self.uconv12(tf.concat([x,x1],axis=nn.conv2d_ch_axis)) + x = self.uconv11(x) + + x = self.up0(x) + x = self.uconv02(tf.concat([x,x0],axis=nn.conv2d_ch_axis)) + x = self.uconv01(x) + + logits = self.out_conv(x) + return logits, tf.nn.sigmoid(logits) + +nn.XSeg = XSeg \ No newline at end of file diff --git a/face_detect/core/leras/models/__init__.py b/face_detect/core/leras/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2f7e5456f74106a0f78268a7bded82eeefee83b4 --- /dev/null +++ b/face_detect/core/leras/models/__init__.py @@ -0,0 +1,4 @@ +from .ModelBase import * +from .PatchDiscriminator import * +from .CodeDiscriminator import * +from .XSeg import * \ No newline at end of file diff --git a/face_detect/core/leras/nn.py b/face_detect/core/leras/nn.py new file mode 100644 index 0000000000000000000000000000000000000000..48fe01337f41b5de6404c0f647018ea94f51c2c2 --- /dev/null +++ b/face_detect/core/leras/nn.py @@ -0,0 +1,300 @@ +""" +Leras. + +like lighter keras. +This is my lightweight neural network library written from scratch +based on pure tensorflow without keras. + +Provides: ++ full freedom of tensorflow operations without keras model's restrictions ++ easy model operations like in PyTorch, but in graph mode (no eager execution) ++ convenient and understandable logic + +Reasons why we cannot import tensorflow or any tensorflow.sub modules right here: +1) program is changing env variables based on DeviceConfig before import tensorflow +2) multiprocesses will import tensorflow every spawn + +NCHW speed up training for 10-20%. +""" + +import os +import sys +import warnings +warnings.simplefilter(action='ignore', category=FutureWarning) +from pathlib import Path +import numpy as np +# from face_feature.core.interact import interact as io +from .device import Devices + + +class nn(): + current_DeviceConfig = None + + tf = None + tf_sess = None + tf_sess_config = None + tf_default_device_name = None + + data_format = None + conv2d_ch_axis = None + conv2d_spatial_axes = None + + floatx = None + + @staticmethod + def initialize(device_config=None, floatx="float32", data_format="NHWC"): + + if nn.tf is None: + if device_config is None: + device_config = nn.getCurrentDeviceConfig() + nn.setCurrentDeviceConfig(device_config) + + # Manipulate environment variables before import tensorflow + + first_run = False + # if len(device_config.devices) != 0: + # if sys.platform[0:3] == 'win': + # # Windows specific env vars + # if all( [ x.name == device_config.devices[0].name for x in device_config.devices ] ): + # devices_str = "_" + device_config.devices[0].name.replace(' ','_') + # else: + # devices_str = "" + # for device in device_config.devices: + # devices_str += "_" + device.name.replace(' ','_') + # + # compute_cache_path = Path(os.environ['APPDATA']) / 'NVIDIA' / ('ComputeCache' + devices_str) + # if not compute_cache_path.exists(): + # first_run = True + # compute_cache_path.mkdir(parents=True, exist_ok=True) + # os.environ['CUDA_CACHE_PATH'] = str(compute_cache_path) + + # if first_run: + # io.log_info("Caching GPU kernels...") + + import tensorflow + + tf_version = tensorflow.version.VERSION + #if tf_version is None: + # tf_version = tensorflow.version.GIT_VERSION + if tf_version[0] == 'v': + tf_version = tf_version[1:] + if tf_version[0] == '2': + tf = tensorflow.compat.v1 + else: + tf = tensorflow + + import logging + # Disable tensorflow warnings + tf_logger = logging.getLogger('tensorflow') + tf_logger.setLevel(logging.ERROR) + + if tf_version[0] == '2': + tf.disable_v2_behavior() + nn.tf = tf + + # Initialize framework + # import core.leras.ops + # import core.leras.layers + # import core.leras.initializers + # import core.leras.optimizers + # import core.leras.models + # import core.leras.archis + + # Configure tensorflow session-config + if len(device_config.devices) == 0: + config = tf.ConfigProto(device_count={'GPU': 0}) + nn.tf_default_device_name = '/CPU:0' + else: + nn.tf_default_device_name = f'/{device_config.devices[0].tf_dev_type}:0' + + config = tf.ConfigProto() + config.gpu_options.visible_device_list = ','.join([str(device.index) for device in device_config.devices]) + + config.gpu_options.force_gpu_compatible = True + config.gpu_options.allow_growth = True + nn.tf_sess_config = config + + if nn.tf_sess is None: + nn.tf_sess = tf.Session(config=nn.tf_sess_config) + + if floatx == "float32": + floatx = nn.tf.float32 + elif floatx == "float16": + floatx = nn.tf.float16 + else: + raise ValueError(f"unsupported floatx {floatx}") + nn.set_floatx(floatx) + nn.set_data_format(data_format) + + @staticmethod + def initialize_main_env(): + Devices.initialize_main_env() + + @staticmethod + def set_floatx(tf_dtype): + """ + set default float type for all layers when dtype is None for them + """ + nn.floatx = tf_dtype + + @staticmethod + def set_data_format(data_format): + if data_format != "NHWC" and data_format != "NCHW": + raise ValueError(f"unsupported data_format {data_format}") + nn.data_format = data_format + + if data_format == "NHWC": + nn.conv2d_ch_axis = 3 + nn.conv2d_spatial_axes = [1,2] + elif data_format == "NCHW": + nn.conv2d_ch_axis = 1 + nn.conv2d_spatial_axes = [2,3] + + @staticmethod + def get4Dshape ( w, h, c ): + """ + returns 4D shape based on current data_format + """ + if nn.data_format == "NHWC": + return (None,h,w,c) + else: + return (None,c,h,w) + + @staticmethod + def to_data_format( x, to_data_format, from_data_format): + if to_data_format == from_data_format: + return x + + if to_data_format == "NHWC": + return np.transpose(x, (0,2,3,1) ) + elif to_data_format == "NCHW": + return np.transpose(x, (0,3,1,2) ) + else: + raise ValueError(f"unsupported to_data_format {to_data_format}") + + @staticmethod + def getCurrentDeviceConfig(): + if nn.current_DeviceConfig is None: + nn.current_DeviceConfig = nn.DeviceConfig.BestGPU() + return nn.current_DeviceConfig + + @staticmethod + def setCurrentDeviceConfig(device_config): + nn.current_DeviceConfig = device_config + + @staticmethod + def reset_session(): + if nn.tf is not None: + if nn.tf_sess is not None: + nn.tf.reset_default_graph() + nn.tf_sess.close() + nn.tf_sess = nn.tf.Session(config=nn.tf_sess_config) + + @staticmethod + def close_session(): + if nn.tf_sess is not None: + nn.tf.reset_default_graph() + nn.tf_sess.close() + nn.tf_sess = None + + @staticmethod + # def ask_choose_device_idxs(choose_only_one=False, allow_cpu=True, suggest_best_multi_gpu=False, suggest_all_gpu=False): + # devices = Devices.getDevices() + # if len(devices) == 0: + # return [] + # + # all_devices_indexes = [device.index for device in devices] + # + # if choose_only_one: + # suggest_best_multi_gpu = False + # suggest_all_gpu = False + # + # if suggest_all_gpu: + # best_device_indexes = all_devices_indexes + # elif suggest_best_multi_gpu: + # best_device_indexes = [device.index for device in devices.get_equal_devices(devices.get_best_device()) ] + # else: + # best_device_indexes = [ devices.get_best_device().index ] + # best_device_indexes = ",".join([str(x) for x in best_device_indexes]) + # + # io.log_info ("") + # if choose_only_one: + # io.log_info ("Choose one GPU idx.") + # else: + # io.log_info ("Choose one or several GPU idxs (separated by comma).") + # io.log_info ("") + # + # if allow_cpu: + # io.log_info ("[CPU] : CPU") + # for device in devices: + # io.log_info (f" [{device.index}] : {device.name}") + # + # io.log_info ("") + # + # while True: + # try: + # if choose_only_one: + # choosed_idxs = io.input_str("Which GPU index to choose?", best_device_indexes) + # else: + # choosed_idxs = io.input_str("Which GPU indexes to choose?", best_device_indexes) + # + # if allow_cpu and choosed_idxs.lower() == "cpu": + # choosed_idxs = [] + # break + # + # choosed_idxs = [ int(x) for x in choosed_idxs.split(',') ] + # + # if choose_only_one: + # if len(choosed_idxs) == 1: + # break + # else: + # if all( [idx in all_devices_indexes for idx in choosed_idxs] ): + # break + # except: + # pass + # io.log_info ("") + # + # return choosed_idxs + + class DeviceConfig(): + @staticmethod + def ask_choose_device(*args, **kwargs): + return nn.DeviceConfig.GPUIndexes( nn.ask_choose_device_idxs(*args,**kwargs) ) + + def __init__ (self, devices=None): + devices = devices or [] + + if not isinstance(devices, Devices): + devices = Devices(devices) + + self.devices = devices + self.cpu_only = len(devices) == 0 + + @staticmethod + def BestGPU(): + devices = Devices.getDevices() + if len(devices) == 0: + return nn.DeviceConfig.CPU() + + return nn.DeviceConfig([devices.get_best_device()]) + + @staticmethod + def WorstGPU(): + devices = Devices.getDevices() + if len(devices) == 0: + return nn.DeviceConfig.CPU() + + return nn.DeviceConfig([devices.get_worst_device()]) + + @staticmethod + def GPUIndexes(indexes): + if len(indexes) != 0: + devices = Devices.getDevices().get_devices_from_index_list(indexes) + else: + devices = [] + + return nn.DeviceConfig(devices) + + @staticmethod + def CPU(): + return nn.DeviceConfig([]) diff --git a/face_detect/core/leras/ops/__init__.py b/face_detect/core/leras/ops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ec2ba78dc5cf01a4207b9fccada47354b3404414 --- /dev/null +++ b/face_detect/core/leras/ops/__init__.py @@ -0,0 +1,405 @@ +import numpy as np +from face_feature.core.leras import nn +tf = nn.tf +from tensorflow.python.ops import array_ops, random_ops, math_ops, sparse_ops, gradients +from tensorflow.python.framework import sparse_tensor + +def tf_get_value(tensor): + return nn.tf_sess.run (tensor) +nn.tf_get_value = tf_get_value + + +def batch_set_value(tuples): + if len(tuples) != 0: + with nn.tf.device('/CPU:0'): + assign_ops = [] + feed_dict = {} + + for x, value in tuples: + if isinstance(value, nn.tf.Operation) or \ + isinstance(value, nn.tf.Variable): + assign_ops.append(value) + else: + value = np.asarray(value, dtype=x.dtype.as_numpy_dtype) + assign_placeholder = nn.tf.placeholder( x.dtype.base_dtype, shape=[None]*value.ndim ) + assign_op = nn.tf.assign (x, assign_placeholder ) + assign_ops.append(assign_op) + feed_dict[assign_placeholder] = value + + nn.tf_sess.run(assign_ops, feed_dict=feed_dict) +nn.batch_set_value = batch_set_value + +def init_weights(weights): + ops = [] + + ca_tuples_w = [] + ca_tuples = [] + for w in weights: + initializer = w.initializer + for input in initializer.inputs: + if "_cai_" in input.name: + ca_tuples_w.append (w) + ca_tuples.append ( (w.shape.as_list(), w.dtype.as_numpy_dtype) ) + break + else: + ops.append (initializer) + + if len(ops) != 0: + nn.tf_sess.run (ops) + + if len(ca_tuples) != 0: + nn.batch_set_value( [*zip(ca_tuples_w, nn.initializers.ca.generate_batch (ca_tuples))] ) +nn.init_weights = init_weights + +def tf_gradients ( loss, vars ): + grads = gradients.gradients(loss, vars, colocate_gradients_with_ops=True ) + gv = [*zip(grads,vars)] + for g,v in gv: + if g is None: + raise Exception(f"Variable {v.name} is declared as trainable, but no tensors flow through it.") + return gv +nn.gradients = tf_gradients + +def average_gv_list(grad_var_list, tf_device_string=None): + if len(grad_var_list) == 1: + return grad_var_list[0] + + e = tf.device(tf_device_string) if tf_device_string is not None else None + if e is not None: e.__enter__() + result = [] + for i, (gv) in enumerate(grad_var_list): + for j,(g,v) in enumerate(gv): + g = tf.expand_dims(g, 0) + if i == 0: + result += [ [[g], v] ] + else: + result[j][0] += [g] + + for i,(gs,v) in enumerate(result): + result[i] = ( tf.reduce_mean( tf.concat (gs, 0), 0 ), v ) + if e is not None: e.__exit__(None,None,None) + return result +nn.average_gv_list = average_gv_list + +def average_tensor_list(tensors_list, tf_device_string=None): + if len(tensors_list) == 1: + return tensors_list[0] + + e = tf.device(tf_device_string) if tf_device_string is not None else None + if e is not None: e.__enter__() + result = tf.reduce_mean(tf.concat ([tf.expand_dims(t, 0) for t in tensors_list], 0), 0) + if e is not None: e.__exit__(None,None,None) + return result +nn.average_tensor_list = average_tensor_list + +def concat (tensors_list, axis): + """ + Better version. + """ + if len(tensors_list) == 1: + return tensors_list[0] + return tf.concat(tensors_list, axis) +nn.concat = concat + +def gelu(x): + cdf = 0.5 * (1.0 + tf.nn.tanh((np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))) + return x * cdf +nn.gelu = gelu + +def upsample2d(x, size=2): + if nn.data_format == "NCHW": + b,c,h,w = x.shape.as_list() + x = tf.reshape (x, (-1,c,h,1,w,1) ) + x = tf.tile(x, (1,1,1,size,1,size) ) + x = tf.reshape (x, (-1,c,h*size,w*size) ) + return x + else: + return tf.image.resize_nearest_neighbor(x, (x.shape[1]*size, x.shape[2]*size) ) +nn.upsample2d = upsample2d + +def resize2d_bilinear(x, size=2): + h = x.shape[nn.conv2d_spatial_axes[0]].value + w = x.shape[nn.conv2d_spatial_axes[1]].value + + if nn.data_format == "NCHW": + x = tf.transpose(x, (0,2,3,1)) + + if size > 0: + new_size = (h*size,w*size) + else: + new_size = (h//-size,w//-size) + + x = tf.image.resize(x, new_size, method=tf.image.ResizeMethod.BILINEAR) + + if nn.data_format == "NCHW": + x = tf.transpose(x, (0,3,1,2)) + + return x +nn.resize2d_bilinear = resize2d_bilinear + +def resize2d_nearest(x, size=2): + if size in [-1,0,1]: + return x + + + if size > 0: + raise Exception("") + else: + if nn.data_format == "NCHW": + x = x[:,:,::-size,::-size] + else: + x = x[:,::-size,::-size,:] + return x + + h = x.shape[nn.conv2d_spatial_axes[0]].value + w = x.shape[nn.conv2d_spatial_axes[1]].value + + if nn.data_format == "NCHW": + x = tf.transpose(x, (0,2,3,1)) + + if size > 0: + new_size = (h*size,w*size) + else: + new_size = (h//-size,w//-size) + + x = tf.image.resize(x, new_size, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) + + if nn.data_format == "NCHW": + x = tf.transpose(x, (0,3,1,2)) + + return x +nn.resize2d_nearest = resize2d_nearest + +def flatten(x): + if nn.data_format == "NHWC": + # match NCHW version in order to switch data_format without problems + x = tf.transpose(x, (0,3,1,2) ) + return tf.reshape (x, (-1, np.prod(x.shape[1:])) ) + +nn.flatten = flatten + +def max_pool(x, kernel_size=2, strides=2): + if nn.data_format == "NHWC": + return tf.nn.max_pool(x, [1,kernel_size,kernel_size,1], [1,strides,strides,1], 'SAME', data_format=nn.data_format) + else: + return tf.nn.max_pool(x, [1,1,kernel_size,kernel_size], [1,1,strides,strides], 'SAME', data_format=nn.data_format) + +nn.max_pool = max_pool + +def reshape_4D(x, w,h,c): + if nn.data_format == "NHWC": + # match NCHW version in order to switch data_format without problems + x = tf.reshape (x, (-1,c,h,w)) + x = tf.transpose(x, (0,2,3,1) ) + return x + else: + return tf.reshape (x, (-1,c,h,w)) +nn.reshape_4D = reshape_4D + +def random_binomial(shape, p=0.0, dtype=None, seed=None): + if dtype is None: + dtype=tf.float32 + + if seed is None: + seed = np.random.randint(10e6) + return array_ops.where( + random_ops.random_uniform(shape, dtype=tf.float16, seed=seed) < p, + array_ops.ones(shape, dtype=dtype), array_ops.zeros(shape, dtype=dtype)) +nn.random_binomial = random_binomial + +def gaussian_blur(input, radius=2.0): + def gaussian(x, mu, sigma): + return np.exp(-(float(x) - float(mu)) ** 2 / (2 * sigma ** 2)) + + def make_kernel(sigma): + kernel_size = max(3, int(2 * 2 * sigma + 1)) + mean = np.floor(0.5 * kernel_size) + kernel_1d = np.array([gaussian(x, mean, sigma) for x in range(kernel_size)]) + np_kernel = np.outer(kernel_1d, kernel_1d).astype(np.float32) + kernel = np_kernel / np.sum(np_kernel) + return kernel, kernel_size + + gauss_kernel, kernel_size = make_kernel(radius) + padding = kernel_size//2 + if padding != 0: + if nn.data_format == "NHWC": + padding = [ [0,0], [padding,padding], [padding,padding], [0,0] ] + else: + padding = [ [0,0], [0,0], [padding,padding], [padding,padding] ] + else: + padding = None + gauss_kernel = gauss_kernel[:,:,None,None] + + x = input + k = tf.tile (gauss_kernel, (1,1,x.shape[nn.conv2d_ch_axis],1) ) + x = tf.pad(x, padding ) + x = tf.nn.depthwise_conv2d(x, k, strides=[1,1,1,1], padding='VALID', data_format=nn.data_format) + return x +nn.gaussian_blur = gaussian_blur + +def style_loss(target, style, gaussian_blur_radius=0.0, loss_weight=1.0, step_size=1): + def sd(content, style, loss_weight): + content_nc = content.shape[ nn.conv2d_ch_axis ] + style_nc = style.shape[nn.conv2d_ch_axis] + if content_nc != style_nc: + raise Exception("style_loss() content_nc != style_nc") + c_mean, c_var = tf.nn.moments(content, axes=nn.conv2d_spatial_axes, keep_dims=True) + s_mean, s_var = tf.nn.moments(style, axes=nn.conv2d_spatial_axes, keep_dims=True) + c_std, s_std = tf.sqrt(c_var + 1e-5), tf.sqrt(s_var + 1e-5) + mean_loss = tf.reduce_sum(tf.square(c_mean-s_mean), axis=[1,2,3]) + std_loss = tf.reduce_sum(tf.square(c_std-s_std), axis=[1,2,3]) + return (mean_loss + std_loss) * ( loss_weight / content_nc.value ) + + if gaussian_blur_radius > 0.0: + target = gaussian_blur(target, gaussian_blur_radius) + style = gaussian_blur(style, gaussian_blur_radius) + + return sd( target, style, loss_weight=loss_weight ) + +nn.style_loss = style_loss + +def dssim(img1,img2, max_val, filter_size=11, filter_sigma=1.5, k1=0.01, k2=0.03): + if img1.dtype != img2.dtype: + raise ValueError("img1.dtype != img2.dtype") + + not_float32 = img1.dtype != tf.float32 + + if not_float32: + img_dtype = img1.dtype + img1 = tf.cast(img1, tf.float32) + img2 = tf.cast(img2, tf.float32) + + filter_size = max(1, filter_size) + + kernel = np.arange(0, filter_size, dtype=np.float32) + kernel -= (filter_size - 1 ) / 2.0 + kernel = kernel**2 + kernel *= ( -0.5 / (filter_sigma**2) ) + kernel = np.reshape (kernel, (1,-1)) + np.reshape(kernel, (-1,1) ) + kernel = tf.constant ( np.reshape (kernel, (1,-1)), dtype=tf.float32 ) + kernel = tf.nn.softmax(kernel) + kernel = tf.reshape (kernel, (filter_size, filter_size, 1, 1)) + kernel = tf.tile (kernel, (1,1, img1.shape[ nn.conv2d_ch_axis ] ,1)) + + def reducer(x): + return tf.nn.depthwise_conv2d(x, kernel, strides=[1,1,1,1], padding='VALID', data_format=nn.data_format) + + c1 = (k1 * max_val) ** 2 + c2 = (k2 * max_val) ** 2 + + mean0 = reducer(img1) + mean1 = reducer(img2) + num0 = mean0 * mean1 * 2.0 + den0 = tf.square(mean0) + tf.square(mean1) + luminance = (num0 + c1) / (den0 + c1) + + num1 = reducer(img1 * img2) * 2.0 + den1 = reducer(tf.square(img1) + tf.square(img2)) + c2 *= 1.0 #compensation factor + cs = (num1 - num0 + c2) / (den1 - den0 + c2) + + ssim_val = tf.reduce_mean(luminance * cs, axis=nn.conv2d_spatial_axes ) + dssim = (1.0 - ssim_val ) / 2.0 + + if not_float32: + dssim = tf.cast(dssim, img_dtype) + return dssim + +nn.dssim = dssim + +def space_to_depth(x, size): + if nn.data_format == "NHWC": + # match NCHW version in order to switch data_format without problems + b,h,w,c = x.shape.as_list() + oh, ow = h // size, w // size + x = tf.reshape(x, (-1, size, oh, size, ow, c)) + x = tf.transpose(x, (0, 2, 4, 1, 3, 5)) + x = tf.reshape(x, (-1, oh, ow, size* size* c )) + return x + else: + return tf.space_to_depth(x, size, data_format=nn.data_format) +nn.space_to_depth = space_to_depth + +def depth_to_space(x, size): + if nn.data_format == "NHWC": + # match NCHW version in order to switch data_format without problems + + b,h,w,c = x.shape.as_list() + oh, ow = h * size, w * size + oc = c // (size * size) + + x = tf.reshape(x, (-1, h, w, size, size, oc, ) ) + x = tf.transpose(x, (0, 1, 3, 2, 4, 5)) + x = tf.reshape(x, (-1, oh, ow, oc, )) + return x + else: + cfg = nn.getCurrentDeviceConfig() + if not cfg.cpu_only: + return tf.depth_to_space(x, size, data_format=nn.data_format) + b,c,h,w = x.shape.as_list() + oh, ow = h * size, w * size + oc = c // (size * size) + + x = tf.reshape(x, (-1, size, size, oc, h, w, ) ) + x = tf.transpose(x, (0, 3, 4, 1, 5, 2)) + x = tf.reshape(x, (-1, oc, oh, ow)) + return x +nn.depth_to_space = depth_to_space + +def rgb_to_lab(srgb): + srgb_pixels = tf.reshape(srgb, [-1, 3]) + linear_mask = tf.cast(srgb_pixels <= 0.04045, dtype=tf.float32) + exponential_mask = tf.cast(srgb_pixels > 0.04045, dtype=tf.float32) + rgb_pixels = (srgb_pixels / 12.92 * linear_mask) + (((srgb_pixels + 0.055) / 1.055) ** 2.4) * exponential_mask + rgb_to_xyz = tf.constant([ + # X Y Z + [0.412453, 0.212671, 0.019334], # R + [0.357580, 0.715160, 0.119193], # G + [0.180423, 0.072169, 0.950227], # B + ]) + xyz_pixels = tf.matmul(rgb_pixels, rgb_to_xyz) + + xyz_normalized_pixels = tf.multiply(xyz_pixels, [1/0.950456, 1.0, 1/1.088754]) + + epsilon = 6/29 + linear_mask = tf.cast(xyz_normalized_pixels <= (epsilon**3), dtype=tf.float32) + exponential_mask = tf.cast(xyz_normalized_pixels > (epsilon**3), dtype=tf.float32) + fxfyfz_pixels = (xyz_normalized_pixels / (3 * epsilon**2) + 4/29) * linear_mask + (xyz_normalized_pixels ** (1/3)) * exponential_mask + + fxfyfz_to_lab = tf.constant([ + # l a b + [ 0.0, 500.0, 0.0], # fx + [116.0, -500.0, 200.0], # fy + [ 0.0, 0.0, -200.0], # fz + ]) + lab_pixels = tf.matmul(fxfyfz_pixels, fxfyfz_to_lab) + tf.constant([-16.0, 0.0, 0.0]) + return tf.reshape(lab_pixels, tf.shape(srgb)) +nn.rgb_to_lab = rgb_to_lab + +def total_variation_mse(images): + """ + Same as generic total_variation, but MSE diff instead of MAE + """ + pixel_dif1 = images[:, 1:, :, :] - images[:, :-1, :, :] + pixel_dif2 = images[:, :, 1:, :] - images[:, :, :-1, :] + + tot_var = ( tf.reduce_sum(tf.square(pixel_dif1), axis=[1,2,3]) + + tf.reduce_sum(tf.square(pixel_dif2), axis=[1,2,3]) ) + return tot_var +nn.total_variation_mse = total_variation_mse + + +def pixel_norm(x, axes): + return x * tf.rsqrt(tf.reduce_mean(tf.square(x), axis=axes, keepdims=True) + 1e-06) +nn.pixel_norm = pixel_norm + +""" +def tf_suppress_lower_mean(t, eps=0.00001): + if t.shape.ndims != 1: + raise ValueError("tf_suppress_lower_mean: t rank must be 1") + t_mean_eps = tf.reduce_mean(t) - eps + q = tf.clip_by_value(t, t_mean_eps, tf.reduce_max(t) ) + q = tf.clip_by_value(q-t_mean_eps, 0, eps) + q = q * (t/eps) + return q +""" \ No newline at end of file diff --git a/face_detect/core/leras/optimizers/AdaBelief.py b/face_detect/core/leras/optimizers/AdaBelief.py new file mode 100644 index 0000000000000000000000000000000000000000..8d8283b26c664bd5763016483642b13a4544cdf7 --- /dev/null +++ b/face_detect/core/leras/optimizers/AdaBelief.py @@ -0,0 +1,80 @@ +from tensorflow.python.ops import control_flow_ops, state_ops +from face_feature.core.leras import nn +tf = nn.tf + +class AdaBelief(nn.OptimizerBase): + def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, lr_dropout=1.0, lr_cos=0, epsilon=1e-7, clipnorm=0.0, name=None, **kwargs): + super().__init__(name=name) + + if name is None: + raise ValueError('name must be defined.') + + self.lr = lr + self.beta_1 = beta_1 + self.beta_2 = beta_2 + self.lr_dropout = lr_dropout + self.lr_cos = lr_cos + self.clipnorm = clipnorm + self.epsilon = epsilon + + with tf.device('/CPU:0') : + with tf.variable_scope(self.name): + self.iterations = tf.Variable(0, dtype=tf.int64, name='iters') + + self.ms_dict = {} + self.vs_dict = {} + self.lr_rnds_dict = {} + + def get_weights(self): + return [self.iterations] + list(self.ms_dict.values()) + list(self.vs_dict.values()) + + def initialize_variables(self, trainable_weights, vars_on_cpu=True, lr_dropout_on_cpu=False): + # Initialize here all trainable variables used in training + e = tf.device('/CPU:0') if vars_on_cpu else None + if e: e.__enter__() + with tf.variable_scope(self.name): + ms = { v.name : tf.get_variable ( f'ms_{v.name}'.replace(':','_'), v.shape, dtype=v.dtype, initializer=tf.initializers.constant(0.0), trainable=False) for v in trainable_weights } + vs = { v.name : tf.get_variable ( f'vs_{v.name}'.replace(':','_'), v.shape, dtype=v.dtype, initializer=tf.initializers.constant(0.0), trainable=False) for v in trainable_weights } + self.ms_dict.update (ms) + self.vs_dict.update (vs) + + if self.lr_dropout != 1.0: + e = tf.device('/CPU:0') if lr_dropout_on_cpu else None + if e: e.__enter__() + lr_rnds = [ nn.random_binomial( v.shape, p=self.lr_dropout, dtype=v.dtype) for v in trainable_weights ] + if e: e.__exit__(None, None, None) + self.lr_rnds_dict.update ( { v.name : rnd for v,rnd in zip(trainable_weights,lr_rnds) } ) + if e: e.__exit__(None, None, None) + + def get_update_op(self, grads_vars): + updates = [] + + if self.clipnorm > 0.0: + norm = tf.sqrt( sum([tf.reduce_sum(tf.square(g)) for g,v in grads_vars])) + updates += [ state_ops.assign_add( self.iterations, 1) ] + for i, (g,v) in enumerate(grads_vars): + if self.clipnorm > 0.0: + g = self.tf_clip_norm(g, self.clipnorm, norm) + + ms = self.ms_dict[ v.name ] + vs = self.vs_dict[ v.name ] + + m_t = self.beta_1*ms + (1.0-self.beta_1) * g + v_t = self.beta_2*vs + (1.0-self.beta_2) * tf.square(g-m_t) + + lr = tf.constant(self.lr, g.dtype) + if self.lr_cos != 0: + lr *= (tf.cos( tf.cast(self.iterations, g.dtype) * (2*3.1415926535/ float(self.lr_cos) ) ) + 1.0) / 2.0 + + v_diff = - lr * m_t / (tf.sqrt(v_t) + self.epsilon) + if self.lr_dropout != 1.0: + lr_rnd = self.lr_rnds_dict[v.name] + v_diff *= lr_rnd + new_v = v + v_diff + + updates.append (state_ops.assign(ms, m_t)) + updates.append (state_ops.assign(vs, v_t)) + updates.append (state_ops.assign(v, new_v)) + + return control_flow_ops.group ( *updates, name=self.name+'_updates') +nn.AdaBelief = AdaBelief \ No newline at end of file diff --git a/face_detect/core/leras/optimizers/OptimizerBase.py b/face_detect/core/leras/optimizers/OptimizerBase.py new file mode 100644 index 0000000000000000000000000000000000000000..1bc6309bb61d4e390085f41544362ae40f56e319 --- /dev/null +++ b/face_detect/core/leras/optimizers/OptimizerBase.py @@ -0,0 +1,42 @@ +import copy +from face_feature.core.leras import nn +tf = nn.tf + +class OptimizerBase(nn.Saveable): + def __init__(self, name=None): + super().__init__(name=name) + + def tf_clip_norm(self, g, c, n): + """Clip the gradient `g` if the L2 norm `n` exceeds `c`. + # Arguments + g: Tensor, the gradient tensor + c: float >= 0. Gradients will be clipped + when their L2 norm exceeds this value. + n: Tensor, actual norm of `g`. + # Returns + Tensor, the gradient clipped if required. + """ + if c <= 0: # if clipnorm == 0 no need to add ops to the graph + return g + + condition = n >= c + then_expression = tf.scalar_mul(c / n, g) + else_expression = g + + # saving the shape to avoid converting sparse tensor to dense + if isinstance(then_expression, tf.Tensor): + g_shape = copy.copy(then_expression.get_shape()) + elif isinstance(then_expression, tf.IndexedSlices): + g_shape = copy.copy(then_expression.dense_shape) + if condition.dtype != tf.bool: + condition = tf.cast(condition, 'bool') + g = tf.cond(condition, + lambda: then_expression, + lambda: else_expression) + if isinstance(then_expression, tf.Tensor): + g.set_shape(g_shape) + elif isinstance(then_expression, tf.IndexedSlices): + g._dense_shape = g_shape + + return g +nn.OptimizerBase = OptimizerBase diff --git a/face_detect/core/leras/optimizers/RMSprop.py b/face_detect/core/leras/optimizers/RMSprop.py new file mode 100644 index 0000000000000000000000000000000000000000..27d00ac788cde3f108615d8030089caaeeca6795 --- /dev/null +++ b/face_detect/core/leras/optimizers/RMSprop.py @@ -0,0 +1,72 @@ +from tensorflow.python.ops import control_flow_ops, state_ops +from face_feature.core.leras import nn +tf = nn.tf + +class RMSprop(nn.OptimizerBase): + def __init__(self, lr=0.001, rho=0.9, lr_dropout=1.0, epsilon=1e-7, clipnorm=0.0, name=None, **kwargs): + super().__init__(name=name) + + if name is None: + raise ValueError('name must be defined.') + + self.lr_dropout = lr_dropout + self.lr = lr + self.rho = rho + self.epsilon = epsilon + + self.clipnorm = clipnorm + + with tf.device('/CPU:0') : + with tf.variable_scope(self.name): + + self.iterations = tf.Variable(0, dtype=tf.int64, name='iters') + + self.accumulators_dict = {} + self.lr_rnds_dict = {} + + def get_weights(self): + return [self.iterations] + list(self.accumulators_dict.values()) + + def initialize_variables(self, trainable_weights, vars_on_cpu=True, lr_dropout_on_cpu=False): + # Initialize here all trainable variables used in training + e = tf.device('/CPU:0') if vars_on_cpu else None + if e: e.__enter__() + with tf.variable_scope(self.name): + accumulators = { v.name : tf.get_variable ( f'acc_{v.name}'.replace(':','_'), v.shape, dtype=v.dtype, initializer=tf.initializers.constant(0.0), trainable=False) for v in trainable_weights } + self.accumulators_dict.update ( accumulators) + + if self.lr_dropout != 1.0: + e = tf.device('/CPU:0') if lr_dropout_on_cpu else None + if e: e.__enter__() + lr_rnds = [ nn.random_binomial( v.shape, p=self.lr_dropout, dtype=v.dtype) for v in trainable_weights ] + if e: e.__exit__(None, None, None) + self.lr_rnds_dict.update ( { v.name : rnd for v,rnd in zip(trainable_weights,lr_rnds) } ) + if e: e.__exit__(None, None, None) + + def get_update_op(self, grads_vars): + updates = [] + + if self.clipnorm > 0.0: + norm = tf.sqrt( sum([tf.reduce_sum(tf.square(g)) for g,v in grads_vars])) + updates += [ state_ops.assign_add( self.iterations, 1) ] + for i, (g,v) in enumerate(grads_vars): + if self.clipnorm > 0.0: + g = self.tf_clip_norm(g, self.clipnorm, norm) + + a = self.accumulators_dict[ v.name ] + + new_a = self.rho * a + (1. - self.rho) * tf.square(g) + + lr = tf.constant(self.lr, g.dtype) + + v_diff = - lr * g / (tf.sqrt(new_a) + self.epsilon) + if self.lr_dropout != 1.0: + lr_rnd = self.lr_rnds_dict[v.name] + v_diff *= lr_rnd + new_v = v + v_diff + + updates.append (state_ops.assign(a, new_a)) + updates.append (state_ops.assign(v, new_v)) + + return control_flow_ops.group ( *updates, name=self.name+'_updates') +nn.RMSprop = RMSprop \ No newline at end of file diff --git a/face_detect/core/leras/optimizers/__init__.py b/face_detect/core/leras/optimizers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4f8a7e4c1a35c0da9fbe837a772252db51f27869 --- /dev/null +++ b/face_detect/core/leras/optimizers/__init__.py @@ -0,0 +1,3 @@ +from .OptimizerBase import * +from .RMSprop import * +from .AdaBelief import * \ No newline at end of file diff --git a/face_detect/core/mathlib/__init__.py b/face_detect/core/mathlib/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7e5fa13325e22b7ffb03af1c7d6f8bc674cfc427 --- /dev/null +++ b/face_detect/core/mathlib/__init__.py @@ -0,0 +1,97 @@ +import math + +import cv2 +import numpy as np +import numpy.linalg as npla + +from .umeyama import umeyama + + +def get_power_of_two(x): + i = 0 + while (1 << i) < x: + i += 1 + return i + +def rotationMatrixToEulerAngles(R) : + sy = math.sqrt(R[0,0] * R[0,0] + R[1,0] * R[1,0]) + singular = sy < 1e-6 + if not singular : + x = math.atan2(R[2,1] , R[2,2]) + y = math.atan2(-R[2,0], sy) + z = math.atan2(R[1,0], R[0,0]) + else : + x = math.atan2(-R[1,2], R[1,1]) + y = math.atan2(-R[2,0], sy) + z = 0 + return np.array([x, y, z]) + +def polygon_area(x,y): + return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1))) + +def rotate_point(origin, point, deg): + """ + Rotate a point counterclockwise by a given angle around a given origin. + + The angle should be given in radians. + """ + ox, oy = origin + px, py = point + + rad = deg * math.pi / 180.0 + qx = ox + math.cos(rad) * (px - ox) - math.sin(rad) * (py - oy) + qy = oy + math.sin(rad) * (px - ox) + math.cos(rad) * (py - oy) + return np.float32([qx, qy]) + +def transform_points(points, mat, invert=False): + if invert: + mat = cv2.invertAffineTransform (mat) + points = np.expand_dims(points, axis=1) + points = cv2.transform(points, mat, points.shape) + points = np.squeeze(points) + return points + + +def transform_mat(mat, res, tx, ty, rotation, scale): + """ + transform mat in local space of res + scale -> translate -> rotate + + tx,ty float + rotation int degrees + scale float + """ + + + lt, rt, lb, ct = transform_points ( np.float32([(0,0),(res,0),(0,res),(res / 2, res/2) ]),mat, True) + + hor_v = (rt-lt).astype(np.float32) + hor_size = npla.norm(hor_v) + hor_v /= hor_size + + ver_v = (lb-lt).astype(np.float32) + ver_size = npla.norm(ver_v) + ver_v /= ver_size + + bt_diag_vec = (rt-ct).astype(np.float32) + half_diag_len = npla.norm(bt_diag_vec) + bt_diag_vec /= half_diag_len + + tb_diag_vec = np.float32( [ -bt_diag_vec[1], bt_diag_vec[0] ] ) + + rt = ct + bt_diag_vec*half_diag_len*scale + lb = ct - bt_diag_vec*half_diag_len*scale + lt = ct - tb_diag_vec*half_diag_len*scale + + rt[0] += tx*hor_size + lb[0] += tx*hor_size + lt[0] += tx*hor_size + rt[1] += ty*ver_size + lb[1] += ty*ver_size + lt[1] += ty*ver_size + + rt = rotate_point(ct, rt, rotation) + lb = rotate_point(ct, lb, rotation) + lt = rotate_point(ct, lt, rotation) + + return cv2.getAffineTransform( np.float32([lt, rt, lb]), np.float32([ [0,0], [res,0], [0,res] ]) ) diff --git a/face_detect/core/mathlib/umeyama.py b/face_detect/core/mathlib/umeyama.py new file mode 100644 index 0000000000000000000000000000000000000000..826a88f1ce5d3112817a367dd8784efa4fa71dc6 --- /dev/null +++ b/face_detect/core/mathlib/umeyama.py @@ -0,0 +1,71 @@ +import numpy as np + +def umeyama(src, dst, estimate_scale): + """Estimate N-D similarity transformation with or without scaling. + Parameters + ---------- + src : (M, N) array + Source coordinates. + dst : (M, N) array + Destination coordinates. + estimate_scale : bool + Whether to estimate scaling factor. + Returns + ------- + T : (N + 1, N + 1) + The homogeneous similarity transformation matrix. The matrix contains + NaN values only if the problem is not well-conditioned. + References + ---------- + .. [1] "Least-squares estimation of transformation parameters between two + point patterns", Shinji Umeyama, PAMI 1991, DOI: 10.1109/34.88573 + """ + + num = src.shape[0] + dim = src.shape[1] + + # Compute mean of src and dst. + src_mean = src.mean(axis=0) + dst_mean = dst.mean(axis=0) + + # Subtract mean from src and dst. + src_demean = src - src_mean + dst_demean = dst - dst_mean + + # Eq. (38). + A = np.dot(dst_demean.T, src_demean) / num + + # Eq. (39). + d = np.ones((dim,), dtype=np.double) + if np.linalg.det(A) < 0: + d[dim - 1] = -1 + + T = np.eye(dim + 1, dtype=np.double) + + U, S, V = np.linalg.svd(A) + + # Eq. (40) and (43). + rank = np.linalg.matrix_rank(A) + if rank == 0: + return np.nan * T + elif rank == dim - 1: + if np.linalg.det(U) * np.linalg.det(V) > 0: + T[:dim, :dim] = np.dot(U, V) + else: + s = d[dim - 1] + d[dim - 1] = -1 + T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V)) + d[dim - 1] = s + else: + T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V)) + + if estimate_scale: + # Eq. (41) and (42). + scale = 1.0 / src_demean.var(axis=0).sum() * np.dot(S, d) + else: + scale = 1.0 + + T[:dim, dim] = dst_mean - scale * np.dot(T[:dim, :dim], src_mean.T) + T[:dim, :dim] *= scale + + return T diff --git a/face_detect/core/osex.py b/face_detect/core/osex.py new file mode 100644 index 0000000000000000000000000000000000000000..7c53e5b2d6ff71d656069a75f2f4c8c28ff0f3e8 --- /dev/null +++ b/face_detect/core/osex.py @@ -0,0 +1,37 @@ +import os +import sys + +if sys.platform[0:3] == 'win': + from ctypes import windll + from ctypes import wintypes + +def set_process_lowest_prio(): + try: + if sys.platform[0:3] == 'win': + GetCurrentProcess = windll.kernel32.GetCurrentProcess + GetCurrentProcess.restype = wintypes.HANDLE + SetPriorityClass = windll.kernel32.SetPriorityClass + SetPriorityClass.argtypes = (wintypes.HANDLE, wintypes.DWORD) + SetPriorityClass ( GetCurrentProcess(), 0x00000040 ) + elif 'darwin' in sys.platform: + os.nice(10) + elif 'linux' in sys.platform: + os.nice(20) + except: + print("Unable to set lowest process priority") + +def set_process_dpi_aware(): + if sys.platform[0:3] == 'win': + windll.user32.SetProcessDPIAware(True) + +def get_screen_size(): + if sys.platform[0:3] == 'win': + user32 = windll.user32 + return user32.GetSystemMetrics(0), user32.GetSystemMetrics(1) + elif 'darwin' in sys.platform: + pass + elif 'linux' in sys.platform: + pass + + return (1366, 768) + \ No newline at end of file diff --git a/face_detect/core/pathex.py b/face_detect/core/pathex.py new file mode 100644 index 0000000000000000000000000000000000000000..709b763b0778070f30b75a17eddda9de432046c8 --- /dev/null +++ b/face_detect/core/pathex.py @@ -0,0 +1,114 @@ +from pathlib import Path +from os import scandir + +image_extensions = [".jpg", ".jpeg", ".png", ".tif", ".tiff"] + +def write_bytes_safe(p, bytes_data): + """ + writes to .tmp first and then rename to target filename + """ + p_tmp = p.parent / (p.name + '.tmp') + p_tmp.write_bytes(bytes_data) + if p.exists(): + p.unlink() + p_tmp.rename (p) + +def scantree(path): + """Recursively yield DirEntry objects for given directory.""" + for entry in scandir(path): + if entry.is_dir(follow_symlinks=False): + yield from scantree(entry.path) # see below for Python 2.x + else: + yield entry + +def get_image_paths(dir_path, image_extensions=image_extensions, subdirs=False, return_Path_class=False): + dir_path = Path (dir_path) + + result = [] + if dir_path.exists(): + + if subdirs: + gen = scantree(str(dir_path)) + else: + gen = scandir(str(dir_path)) + + for x in list(gen): + if any([x.name.lower().endswith(ext) for ext in image_extensions]): + result.append( x.path if not return_Path_class else Path(x.path) ) + return sorted(result) + +def get_image_unique_filestem_paths(dir_path, verbose_print_func=None): + result = get_image_paths(dir_path) + result_dup = set() + + for f in result[:]: + f_stem = Path(f).stem + if f_stem in result_dup: + result.remove(f) + if verbose_print_func is not None: + verbose_print_func ("Duplicate filenames are not allowed, skipping: %s" % Path(f).name ) + continue + result_dup.add(f_stem) + + return sorted(result) + +def get_paths(dir_path): + dir_path = Path (dir_path) + + if dir_path.exists(): + return [ Path(x) for x in sorted([ x.path for x in list(scandir(str(dir_path))) ]) ] + else: + return [] + +def get_file_paths(dir_path): + dir_path = Path (dir_path) + + if dir_path.exists(): + return [ Path(x) for x in sorted([ x.path for x in list(scandir(str(dir_path))) if x.is_file() ]) ] + else: + return [] + +def get_all_dir_names (dir_path): + dir_path = Path (dir_path) + + if dir_path.exists(): + return sorted([ x.name for x in list(scandir(str(dir_path))) if x.is_dir() ]) + else: + return [] + +def get_all_dir_names_startswith (dir_path, startswith): + dir_path = Path (dir_path) + startswith = startswith.lower() + + result = [] + if dir_path.exists(): + for x in list(scandir(str(dir_path))): + if x.name.lower().startswith(startswith): + result.append ( x.name[len(startswith):] ) + return sorted(result) + +def get_first_file_by_stem (dir_path, stem, exts=None): + dir_path = Path (dir_path) + stem = stem.lower() + + if dir_path.exists(): + for x in sorted(list(scandir(str(dir_path))), key=lambda x: x.name): + if not x.is_file(): + continue + xp = Path(x.path) + if xp.stem.lower() == stem and (exts is None or xp.suffix.lower() in exts): + return xp + + return None + +def move_all_files (src_dir_path, dst_dir_path): + paths = get_file_paths(src_dir_path) + for p in paths: + p = Path(p) + p.rename ( Path(dst_dir_path) / p.name ) + +def delete_all_files (dir_path): + paths = get_file_paths(dir_path) + for p in paths: + p = Path(p) + p.unlink() diff --git a/face_detect/core/randomex.py b/face_detect/core/randomex.py new file mode 100644 index 0000000000000000000000000000000000000000..9c8dc6359932996f9edf61bbac077d204775e4e6 --- /dev/null +++ b/face_detect/core/randomex.py @@ -0,0 +1,14 @@ +import numpy as np + +def random_normal( size=(1,), trunc_val = 2.5 ): + len = np.array(size).prod() + result = np.empty ( (len,) , dtype=np.float32) + + for i in range (len): + while True: + x = np.random.normal() + if x >= -trunc_val and x <= trunc_val: + break + result[i] = (x / trunc_val) + + return result.reshape ( size ) \ No newline at end of file diff --git a/face_detect/core/stdex.py b/face_detect/core/stdex.py new file mode 100644 index 0000000000000000000000000000000000000000..2f23be99ed0f1a526339e0918550a30a449eabcd --- /dev/null +++ b/face_detect/core/stdex.py @@ -0,0 +1,36 @@ +import os +import sys + +class suppress_stdout_stderr(object): + def __enter__(self): + self.outnull_file = open(os.devnull, 'w') + self.errnull_file = open(os.devnull, 'w') + + self.old_stdout_fileno_undup = sys.stdout.fileno() + self.old_stderr_fileno_undup = sys.stderr.fileno() + + self.old_stdout_fileno = os.dup ( sys.stdout.fileno() ) + self.old_stderr_fileno = os.dup ( sys.stderr.fileno() ) + + self.old_stdout = sys.stdout + self.old_stderr = sys.stderr + + os.dup2 ( self.outnull_file.fileno(), self.old_stdout_fileno_undup ) + os.dup2 ( self.errnull_file.fileno(), self.old_stderr_fileno_undup ) + + sys.stdout = self.outnull_file + sys.stderr = self.errnull_file + return self + + def __exit__(self, *_): + sys.stdout = self.old_stdout + sys.stderr = self.old_stderr + + os.dup2 ( self.old_stdout_fileno, self.old_stdout_fileno_undup ) + os.dup2 ( self.old_stderr_fileno, self.old_stderr_fileno_undup ) + + os.close ( self.old_stdout_fileno ) + os.close ( self.old_stderr_fileno ) + + self.outnull_file.close() + self.errnull_file.close() diff --git a/face_detect/core/structex.py b/face_detect/core/structex.py new file mode 100644 index 0000000000000000000000000000000000000000..cc63559febb2ca1f30f6ac0d23dbc94a947c3b10 --- /dev/null +++ b/face_detect/core/structex.py @@ -0,0 +1,5 @@ +import struct + +def struct_unpack(data, counter, fmt): + fmt_size = struct.calcsize(fmt) + return (counter+fmt_size,) + struct.unpack (fmt, data[counter:counter+fmt_size]) diff --git a/face_detect/face_align_5_landmarks.py b/face_detect/face_align_5_landmarks.py new file mode 100644 index 0000000000000000000000000000000000000000..0f8c2a6bfbe95dd13dd4083e6090ce026112f833 --- /dev/null +++ b/face_detect/face_align_5_landmarks.py @@ -0,0 +1,141 @@ +# -- coding: utf-8 -- +# @Time : 2021/11/10 + + + +import numpy as np +import cv2 +from cv2box.utils.math import Normalize +from cv2box import CVImage + +from .scrfd_insightface import SCRFD +from face_detect.face_align_utils import norm_crop, apply_roi_func + +# https://github.com/deepinsight/insightface/tree/master/detection/scrfd +SCRFD_MODEL_PATH = '../pretrain_models/' + +class FaceDetect5Landmarks: + def __init__(self, mode='scrfd_500m', tracking=False): + self.mode = mode + self.tracking = tracking + self.dis_list = [] + self.last_bboxes_ = [] + assert self.mode in ['scrfd', 'scrfd_500m', 'mtcnn'] + self.bboxes = self.kpss = self.image = None + if 'scrfd' in self.mode: + if self.mode == 'scrfd_500m': + scrfd_model_path = SCRFD_MODEL_PATH + 'scrfd_500m_bnkps_shape640x640.onnx' + else: + scrfd_model_path = SCRFD_MODEL_PATH + 'scrfd_10g_bnkps.onnx' + self.det_model_scrfd = SCRFD(scrfd_model_path) + self.det_model_scrfd.prepare(ctx_id=0, input_size=(640, 640)) + + + def get_bboxes(self, image, nms_thresh=0.5, max_num=0, min_bbox_size=None): + """ + Args: + image: RGB image path or Numpy array load by cv2 + nms_thresh: + max_num: + min_bbox_size: + Returns: + """ + self.image = CVImage(image).rgb() + + if self.tracking: + if len(self.last_bboxes_) == 0: + self.bboxes, self.kpss = self.det_model_scrfd.detect(image, thresh=nms_thresh, max_num=1, + metric='default') + self.last_bboxes_ = self.bboxes + # return self.bboxes, self.kpss + else: + self.bboxes, self.kpss = self.det_model_scrfd.detect(image, thresh=nms_thresh, max_num=0, + metric='default') + self.bboxes, self.kpss = self.tracking_filter() + else: + if 'scrfd' in self.mode: + self.bboxes, self.kpss = self.det_model_scrfd.detect(self.image, thresh=nms_thresh, + max_num=max_num, + metric='default') + + return self.bboxes, self.kpss + + def tracking_filter(self): + for i in range(len(self.bboxes)): + self.dis_list.append(np.linalg.norm(Normalize(self.bboxes[i]).np_norm() - Normalize(self.last_bboxes_[0]).np_norm())) + if not self.dis_list: + return [], [] + best_index = np.argmin(np.array(self.dis_list)) + self.dis_list = [] + self.last_bboxes_ = [self.bboxes[best_index]] + return self.last_bboxes_, [self.kpss[best_index]] + + def bboxes_filter(self, min_bbox_size): + min_area = np.power(min_bbox_size, 2) + area_list = (self.bboxes[:, 2] - self.bboxes[:, 0]) * (self.bboxes[:, 3] - self.bboxes[:, 1]) + min_index = np.where(area_list < min_area) + self.bboxes = np.delete(self.bboxes, min_index, axis=0) + self.kpss = np.delete(self.kpss, min_index, axis=0) + + def get_single_face(self, crop_size, mode='mtcnn_512', apply_roi=False): + """ + Args: + crop_size: + mode: default mtcnn_512 arcface_512 arcface default_95 + Returns: cv2 image + """ + assert mode in ['default', 'mtcnn_512', 'mtcnn_256', 'arcface_512', 'arcface', 'default_95'] + if self.bboxes.shape[0] == 0: + return None, None + det_score = self.bboxes[..., 4] + if self.tracking: + best_index = np.argmax(np.array(self.dis_list)) + kpss = None + if self.kpss is not None: + kpss = self.kpss[best_index] + else: + best_index = np.argmax(det_score) + kpss = None + if self.kpss is not None: + kpss = self.kpss[best_index] + if apply_roi: + roi, roi_box, roi_kpss = apply_roi_func(self.image, self.bboxes[best_index], kpss) + align_img, mat_rev = norm_crop(roi, roi_kpss, crop_size, mode=mode) + align_img = cv2.cvtColor(align_img, cv2.COLOR_RGB2BGR) + return align_img, mat_rev, roi_box + else: + align_img, M = norm_crop(self.image, kpss, crop_size, mode=mode) + align_img = cv2.cvtColor(align_img, cv2.COLOR_RGB2BGR) + return align_img, M + + def get_multi_face(self, crop_size, mode='mtcnn_512'): + """ + Args: + crop_size: + mode: default mtcnn_512 arcface_512 arcface + Returns: + """ + if self.bboxes.shape[0] == 0: + return None + align_img_list = [] + M_list = [] + for i in range(self.bboxes.shape[0]): + kps = None + if self.kpss is not None: + kps = self.kpss[i] + align_img, M = norm_crop(self.image, kps, crop_size, mode=mode) + align_img_list.append(align_img) + M_list.append(M) + return align_img_list, M_list + + def draw_face(self): + for i_ in range(self.bboxes.shape[0]): + bbox = self.bboxes[i_] + x1, y1, x2, y2, score = bbox.astype(int) + cv2.rectangle(self.image, (x1, y1), (x2, y2), (255, 0, 0), 2) + if self.kpss is not None: + kps = self.kpss[i_] + for kp in kps: + kp = kp.astype(int) + cv2.circle(self.image, tuple(kp), 1, (0, 0, 255), 2) + CVImage(self.image, image_format='cv2').show() diff --git a/face_detect/face_align_68.py b/face_detect/face_align_68.py new file mode 100644 index 0000000000000000000000000000000000000000..9401a2f096096426a75ad4a62fbb81e30cd824c8 --- /dev/null +++ b/face_detect/face_align_68.py @@ -0,0 +1,409 @@ +import cv2 +import numpy as np +from PIL import Image +from .LandmarksProcessor import get_transform_mat_all +import onnxruntime as ort +def drawLandmark_multiple(img, bbox, landmark): + ''' + Input: + - img: gray or RGB + - bbox: type of BBox + - landmark: reproject landmark of (5L, 2L) + Output: + - img marked with landmark and bbox + ''' + cv2.rectangle(img, (bbox['left'], bbox['top']), (bbox['right'], bbox['bottom']), (0,0,255), 2) + for x, y in landmark: + cv2.circle(img, (int(x), int(y)), 2, (0,255,0), -1) + return img + +def drawLandmark_multiple_list(img, bbox, landmark): + ''' + Input: + - img: gray or RGB + - bbox: type of BBox + - landmark: reproject landmark of (5L, 2L) + Output: + - img marked with landmark and bbox + ''' + cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0,0,255), 2) + for x, y in landmark: + cv2.circle(img, (int(x), int(y)), 2, (0,255,0), -1) + return img +REFERENCE_FACIAL_POINTS = [ + [30.29459953, 51.69630051], + [65.53179932, 51.50139999], + [48.02519989, 71.73660278], + [33.54930115, 92.3655014], + [62.72990036, 92.20410156] +] +landmarks_2D_new = np.array([ + [0.000213256, 0.106454], # 17 + [0.0752622, 0.038915], # 18 + [0.18113, 0.0187482], # 19 + [0.29077, 0.0344891], # 20 + [0.393397, 0.0773906], # 21 + [0.586856, 0.0773906], # 22 + [0.689483, 0.0344891], # 23 + [0.799124, 0.0187482], # 24 + [0.904991, 0.038915], # 25 + [0.98004, 0.106454], # 26 + [0.490127, 0.203352], # 27 + [0.490127, 0.307009], # 28 + [0.490127, 0.409805], # 29 + [0.490127, 0.515625], # 30 + [0.36688, 0.587326], # 31 + [0.426036, 0.609345], # 32 + [0.490127, 0.628106], # 33 + [0.554217, 0.609345], # 34 + [0.613373, 0.587326], # 35 + [0.121737, 0.216423], # 36 + [0.187122, 0.178758], # 37 + [0.265825, 0.179852], # 38 + [0.334606, 0.231733], # 39 + [0.260918, 0.245099], # 40 + [0.182743, 0.244077], # 41 + [0.645647, 0.231733], # 42 + [0.714428, 0.179852], # 43 + [0.793132, 0.178758], # 44 + [0.858516, 0.216423], # 45 + [0.79751, 0.244077], # 46 + [0.719335, 0.245099], # 47 + [0.254149, 0.780233], # 48 + [0.726104, 0.780233], # 54 +], dtype=np.float32) +mesh_33=[70,63,105,66,107,336,296,334,293,300,168,197,5,4,240,99,2,328,460,33,160,158,133,153,144,362,385,387,263,373,380,57,287] + + +def convert98to68(list_info): + points = list_info[0,0:196] + info_68 = [] + for j in range(17): + x = points[j * 2 * 2 + 0] + y = points[j * 2 * 2 + 1] + info_68.append(x) + info_68.append(y) + for j in range(33, 38): + x = points[j * 2 + 0] + y = points[j * 2 + 1] + info_68.append(x) + info_68.append(y) + for j in range(42, 47): + x = points[j * 2 + 0] + y = points[j * 2 + 1] + info_68.append(x) + info_68.append(y) + for j in range(51, 61): + x = points[j * 2 + 0] + y = points[j * 2 + 1] + info_68.append(x) + info_68.append(y) + point_38_x = (float(points[60 * 2 + 0]) + float(points[62 * 2 + 0])) / 2.0 + point_38_y = (float(points[60 * 2 + 1]) + float(points[62 * 2 + 1])) / 2.0 + point_39_x = (float(points[62 * 2 + 0]) + float(points[64 * 2 + 0])) / 2.0 + point_39_y = (float(points[62 * 2 + 1]) + float(points[64 * 2 + 1])) / 2.0 + point_41_x = (float(points[64 * 2 + 0]) + float(points[66 * 2 + 0])) / 2.0 + point_41_y = (float(points[64 * 2 + 1]) + float(points[66 * 2 + 1])) / 2.0 + point_42_x = (float(points[60 * 2 + 0]) + float(points[66 * 2 + 0])) / 2.0 + point_42_y = (float(points[60 * 2 + 1]) + float(points[66 * 2 + 1])) / 2.0 + point_44_x = (float(points[68 * 2 + 0]) + float(points[70 * 2 + 0])) / 2.0 + point_44_y = (float(points[68 * 2 + 1]) + float(points[70 * 2 + 1])) / 2.0 + point_45_x = (float(points[70 * 2 + 0]) + float(points[72 * 2 + 0])) / 2.0 + point_45_y = (float(points[70 * 2 + 1]) + float(points[72 * 2 + 1])) / 2.0 + point_47_x = (float(points[72 * 2 + 0]) + float(points[74 * 2 + 0])) / 2.0 + point_47_y = (float(points[72 * 2 + 1]) + float(points[74 * 2 + 1])) / 2.0 + point_48_x = (float(points[68 * 2 + 0]) + float(points[74 * 2 + 0])) / 2.0 + point_48_y = (float(points[68 * 2 + 1]) + float(points[74 * 2 + 1])) / 2.0 + info_68.append((point_38_x)) + info_68.append((point_38_y)) + info_68.append((point_39_x)) + info_68.append((point_39_y)) + info_68.append(points[64 * 2 + 0]) + info_68.append(points[64 * 2 + 1]) + info_68.append((point_41_x)) + info_68.append((point_41_y)) + info_68.append((point_42_x)) + info_68.append((point_42_y)) + info_68.append(points[68 * 2 + 0]) + info_68.append(points[68 * 2 + 1]) + info_68.append((point_44_x)) + info_68.append((point_44_y)) + info_68.append((point_45_x)) + info_68.append((point_45_y)) + info_68.append(points[72 * 2 + 0]) + info_68.append(points[72 * 2 + 1]) + info_68.append((point_47_x)) + info_68.append((point_47_y)) + info_68.append((point_48_x)) + info_68.append((point_48_y)) + for j in range(76, 96): + x = points[j * 2 + 0] + y = points[j * 2 + 1] + info_68.append(x) + info_68.append(y) + for j in range(len(list_info[196:])): + info_68.append(list_info[196 + j]) + return np.array(info_68) +def crop(image, center, scale, resolution=256.0): + ul = transform([1, 1], center, scale, resolution).astype(np.int) + br = transform([resolution, resolution], center, scale, resolution).astype(np.int) + + if image.ndim > 2: + newDim = np.array([br[1] - ul[1], br[0] - ul[0], image.shape[2]], dtype=np.int32) + newImg = np.zeros(newDim, dtype=np.uint8) + else: + newDim = np.array([br[1] - ul[1], br[0] - ul[0]], dtype=np.int) + newImg = np.zeros(newDim, dtype=np.uint8) + ht = image.shape[0] + wd = image.shape[1] + newX = np.array([max(1, -ul[0] + 1), min(br[0], wd) - ul[0]], dtype=np.int32) + newY = np.array([max(1, -ul[1] + 1), min(br[1], ht) - ul[1]], dtype=np.int32) + oldX = np.array([max(1, ul[0] + 1), min(br[0], wd)], dtype=np.int32) + oldY = np.array([max(1, ul[1] + 1), min(br[1], ht)], dtype=np.int32) + newImg[newY[0] - 1:newY[1], newX[0] - 1:newX[1]] = image[oldY[0] - 1:oldY[1], oldX[0] - 1:oldX[1], :] + + newImg = cv2.resize(newImg, dsize=(int(resolution), int(resolution)), interpolation=cv2.INTER_LINEAR) + return newImg + +def get_pts_from_predict(a, center, scale): + a_ch, a_h, a_w = a.shape + + b = a.reshape((a_ch, a_h * a_w)) + c = b.argmax(1).reshape((a_ch, 1)).repeat(2, axis=1).astype(np.float) + c[:, 0] %= a_w + c[:, 1] = np.apply_along_axis(lambda x: np.floor(x / a_w), 0, c[:, 1]) + + for i in range(a_ch): + pX, pY = int(c[i, 0]), int(c[i, 1]) + if pX > 0 and pX < 63 and pY > 0 and pY < 63: + diff = np.array([a[i, pY, pX + 1] - a[i, pY, pX - 1], a[i, pY + 1, pX] - a[i, pY - 1, pX]]) + c[i] += np.sign(diff) * 0.25 + + c += 0.5 + + return np.array([transform(c[i], center, scale, a_w) for i in range(a_ch)]) +def transform(point, center, scale, resolution): + pt = np.array([point[0], point[1], 1.0]) + h = 200.0 * scale + m = np.eye(3) + m[0, 0] = resolution / h + m[1, 1] = resolution / h + m[0, 2] = resolution * (-center[0] / h + 0.5) + m[1, 2] = resolution * (-center[1] / h + 0.5) + m = np.linalg.inv(m) + return np.matmul(m, pt)[0:2] + +class pfpld(): + def __init__(self,cpu=True): + onnx_path = "./pretrain_models/pfpld_robust_sim_bs1_8003.onnx" + try: + self.ort_session = ort.InferenceSession(onnx_path) + except Exception as e: + raise e("load onnx failed") + # e.g. ['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider'] ordered by priority + # self.ort_session.get_providers() + if cpu: + self.ort_session.set_providers(['CPUExecutionProvider']) + else: + self.ort_session.set_providers(['CUDAExecutionProvider']) + self.input_name = self.ort_session.get_inputs()[0].name + + def forward(self, input): + size = input.shape + ort_inputs = {self.input_name: (cv2.resize(input, (112, 112)) / 255).astype(np.float32).transpose(2, 0, 1)[None]} + pred = self.ort_session.run(None, ort_inputs) + pred = convert98to68(pred[1]) + return pred.reshape(-1, 68, 2) * size[:2][::-1] + +class face_alignment_landmark(): + def __init__(self,lm_type=68,method='affine'): + self.lm_type=lm_type + self.method=method + self.frame_index = 0 + + if lm_type==68: + #self.fan = fan() + self.fan = pfpld(cpu=False) + self.refrence = landmarks_2D_new + + else: + raise Exception('landmark shape error') + + if method=='similarity': + self.refrence=self.refrence + + def forward(self,img, boxes,kpss,limit=None, min_face_size=64.0, crop_size=(112, 112), apply_roi=False, multi_sample=True): + if limit: + boxes = boxes[:limit] + # cv2.imshow('img', cv2.resize(img, (512, 512))) + # if cv2.waitKey(1) == ord('q'): # 按Q退出 + # pass + + faces = [] + Ms = [] + rois = [] + masks = [] + for i,box in enumerate(boxes): + if apply_roi: + box = np.round(np.array(boxes[i])).astype(int)[:4] + roi_pad_w = int(0.6 * max([box[2]-box[0],box[3]-box[1]])) + roi_pad_h= int(0.4 * max([box[2]-box[0],box[3]-box[1]])) + + roi_box = np.array([ + max(0, box[0] - roi_pad_w), + max(0, box[1] - roi_pad_h), + min(img.shape[1], box[2] + roi_pad_w), + min(img.shape[0], box[3] + roi_pad_h) + ]) + rois.append(roi_box) + roi = img[roi_box[1]:roi_box[3], roi_box[0]:roi_box[2]].copy() + # cv2.imwrite("data/test/roi_{}_{}.jpg".format(self.frame_index, i), roi) + self.frame_index += 1 + # mrow1 = roi_box[1] + # mcol1 = roi_box[0] + # roi_facial5points = facial5points.copy() + + # cv2.imshow('roi', roi) + # if cv2.waitKey(1) == ord('q'): # 按Q退出 + # break + mrow1 = roi_box[1] + mcol1 = roi_box[0] + facial5points=kpss[i] + facial5points[:, 0] -= mcol1 + facial5points[:, 1] -= mrow1 + if multi_sample : + roi_facial5points_list=[] + move_list=[[0,0],[-1,-1],[1,1],[-1,1],[1,-1]] + distance = int(0.01 * max([box[2]-box[0],box[3]-box[1]])) + x1, y1, x2, y2 = box + w = x2 - x1 + 1 + h = y2 - y1 + 1 + size_w = int(max([w, h]) * 0.9) + size_h = int(max([w, h]) * 0.9) + height, width = img.shape[:2] + for i in range(1): + move=move_list[i] + cx = (x1 + x2) // 2+move[1]*distance + cy = (y1 + y2) // 2+move[0]*distance + x1 = cx - size_w // 2 + x2 = x1 + size_w + y1 = cy - int(size_h * 0.4) + y2 = y1 + size_h + left = 0 + top = 0 + bottom = 0 + right = 0 + if x1 < 0: + left = -x1 + if y1 < 0: + top = -y1 + if x2 >= width: + right = x2 - width + if y2 >= height: + bottom = y2 - height + + x1 = max(0, x1) + y1 = max(0, y1) + + x2 = min(width, x2) + y2 = min(height, y2) + roi_2 = img[y1:y2, x1:x2] + roi_2 = cv2.copyMakeBorder(roi_2, top, bottom, left, right, cv2.BORDER_CONSTANT, 0) + roi_box_2=[x1,y1,x2,y2] + # roi_pad_2 = int(0.2 * max([box[2] - box[0], box[3] - box[1]])) + # roi_box_2 = np.array([ + # max(0, box[0] - roi_pad_2+move[0]*distance), + # max(0, box[1] - roi_pad_2+move[1]*distance), + # min(img.shape[1], box[2] + roi_pad_2+move[0]*distance), + # min(img.shape[0], box[3] + roi_pad_2+move[1]*distance) + # ]) + # roi_2 = img[roi_box_2[1]:roi_box_2[3], roi_box_2[0]:roi_box_2[2]].copy() + + + if self.lm_type==68: + landmarks=self.fan.forward(roi_2) + + # print("landmarks:", landmarks.shape) + # image = img.copy() + # point_size = 5 + # point_color = (0, 0, 255) # BGR + # thickness = -1 + # for index in range(landmarks.shape[1]): + # x = round(landmarks[0][index][0]) + x1 + # y = round(landmarks[0][index][1]) + y1 + # point = (x, y) + # cv2.circle(image, point, point_size, point_color, thickness) + # cv2.imwrite('data/source/source_lm_066_68.png', image) + + + + bbox={'left':0,'top':0,'bottom':roi_2.shape[1],'right':roi_2.shape[0]} + # a = drawLandmark_multiple(roi_2,bbox ,landmarks[0] ) + # cv2.imshow('landmark',a,) + # cv2.waitKey(1) + if len(landmarks) >= 1: + roi_facial5points_tmp = landmarks[0] + roi_facial5points_tmp[:, 0] -= roi_box[0] - roi_box_2[0] + left + roi_facial5points_tmp[:, 1] -= roi_box[1] - roi_box_2[1] + top + roi_facial5points_list.append(roi_facial5points_tmp) + elif self.lm_type==468: + results = self.mp_m_landmark.process(roi) + if results.multi_face_landmarks is None: + landmarks=[] + else: + landmarks = np.array( + [(lm.x, lm.y, lm.z) for lm in results.multi_face_landmarks[0].landmark])[:, :2] + landmarks = (landmarks * np.array([roi.shape[1], roi.shape[0]])).astype(np.int)[None,...] + + if len(landmarks) >= 1: + roi_facial5points_tmp = landmarks[0] + roi_facial5points_list.append(roi_facial5points_tmp) + + if len(roi_facial5points_list)>0: + # cv2.imshow('succsess', cv2.resize(roi_2,(512,512))) + # if cv2.waitKey(1) == ord('q'): # 按Q退出 + # break + roi_facial5points=np.mean(roi_facial5points_list,axis=0) + if self.lm_type==68: + roi_facial5points = np.concatenate([roi_facial5points[17:49], roi_facial5points[54:55]]) + elif self.lm_type == 468: + roi_facial5points = roi_facial5points[mesh_33] + + if self.method=='affine': + if self.lm_type==468: + mat = get_transform_mat_all(roi_facial5points, self.refrence, output_size=crop_size[0], + scale=1.04,gcx=-0.02,gcy=0.25) + elif self.lm_type==68: + mat = get_transform_mat_all(roi_facial5points, self.refrence, output_size=crop_size[0], + scale=1.06,gcx=-0.02,gcy=0.21) # 1.06 0.9 + warped_face=cv2.warpAffine(roi, mat, crop_size) + M = cv2.invertAffineTransform(mat) + + + # cv2.imshow('warped_face', warped_face) + # if cv2.waitKey(1) == ord('q'): # 按Q退出 + # break + # cv2.imshow('warped_face_5', warped_face_5) + # if cv2.waitKey(1) == ord('q'): # 按Q退出 + # break + + face = Image.fromarray(warped_face) + faces.append(face) + Ms.append(M) + # mask = np.full(crop_size, 255, dtype=float) + # mask = cv2.warpAffine(mask, M, roi.shape[:2][::-1]) + # mask[mask > 20] = 255 + mask= np.array([0,1]) + masks.append(mask) + else: + # cv2.imshow('failure', cv2.resize(roi,(512,512))) + # if cv2.waitKey(1) == ord('q'): # 按Q退出 + # break + pass + + + + if apply_roi: + return rois, faces, Ms, masks + else: + return boxes, faces, Ms diff --git a/face_detect/face_align_utils.py b/face_detect/face_align_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..86781399450aa5c2fa5ef9d27c16a08f249e70f2 --- /dev/null +++ b/face_detect/face_align_utils.py @@ -0,0 +1,146 @@ +import cv2 +import numpy as np +from skimage import transform as trans + +src1 = np.array([[51.642, 50.115], [57.617, 49.990], [35.740, 69.007], + [51.157, 89.050], [57.025, 89.702]], + dtype=np.float32) +# <--left +src2 = np.array([[45.031, 50.118], [65.568, 50.872], [39.677, 68.111], + [45.177, 86.190], [64.246, 86.758]], + dtype=np.float32) + +# ---frontal +src3 = np.array([[39.730, 51.138], [72.270, 51.138], [56.000, 68.493], + [42.463, 87.010], [69.537, 87.010]], + dtype=np.float32) + +# -->right +src4 = np.array([[46.845, 50.872], [67.382, 50.118], [72.737, 68.111], + [48.167, 86.758], [67.236, 86.190]], + dtype=np.float32) + +# -->right profile +src5 = np.array([[54.796, 49.990], [60.771, 50.115], [76.673, 69.007], + [55.388, 89.702], [61.257, 89.050]], + dtype=np.float32) + +multi_src = np.array([src1, src2, src3, src4, src5]) +multi_src_map = {112: multi_src, 224: multi_src * 2, 512: multi_src * (512 / 112)} + +arcface_src = np.array( + [[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366], + [41.5493, 92.3655], [70.7299, 92.2041]], + dtype=np.float32) + +# mtcnn_src = [ +# [30.29459953, 51.69630051], [65.53179932, 51.50139999], [48.02519989, 71.73660278], +# [33.54930115, 92.3655014], [62.72990036, 92.20410156] +# ] +# tmp_crop_size = np.array((96, 112)) +# size_diff = max(tmp_crop_size) - tmp_crop_size +# mtcnn_src += size_diff / 2 +# ref_pts = np.float32(mtcnn_src) +# ref_pts = (ref_pts - 112 / 2) * 0.85 + 112 / 2 +# ref_pts *= 512 / 112. +# mtcnn_src_512 = ref_pts +# print(mtcnn_src_512) + +mtcnn_512 = [[187.20187, 239.27705], + [324.1236, 238.51973], + [256.09793, 317.14795], + [199.84871, 397.30597], + [313.2362, 396.6788]] + +mtcnn_256 = np.array(mtcnn_512) * 0.5 + +arcface_src_512 = arcface_src * np.array([512 / 112, 512 / 112]) +arcface_src = np.expand_dims(arcface_src, axis=0) + + +def get_src_modify(srcs, arcface_src): + srcs += ((arcface_src[2] - srcs[2][2]) * np.array([1, 1.8]))[None] + return srcs + + +# lmk is prediction; src is template +def estimate_norm(lmk, image_size=112, mode='arcface'): + assert lmk.shape == (5, 2) + tform = trans.SimilarityTransform() + lmk_tran = np.insert(lmk, 2, values=np.ones(5), axis=1) + min_M = [] + min_index = [] + min_error = float('inf') + if mode == 'arcface': + assert image_size == 112 + src = arcface_src + elif mode == 'arcface_512': + src = np.expand_dims(arcface_src_512, axis=0) + elif mode == 'mtcnn_512': + src = np.expand_dims(mtcnn_512, axis=0) + elif mode == 'mtcnn_256': + src = np.expand_dims(mtcnn_256, axis=0) + elif mode == 'default_95': + src = get_src_modify(multi_src, arcface_src[0]) + src_map = {112: src.copy(), 224: src.copy() * 2, 256: src.copy() * 256 / 112 * 0.95, + 512: src.copy() * (512 / 112) * 0.95} + src = src_map[image_size] + else: + src = multi_src_map[image_size] + for i in np.arange(src.shape[0]): + tform.estimate(lmk, src[i]) + M = tform.params[0:2, :] + results = np.dot(M, lmk_tran.T) + results = results.T + error = np.sum(np.sqrt(np.sum((results - src[i]) ** 2, axis=1))) + # print(error) + if error < min_error: + min_error = error + min_M = M + min_index = i + return min_M, min_index + + +def norm_crop(img, landmark, crop_size=112, mode='arcface'): + mat, pose_index = estimate_norm(landmark, crop_size, mode) + warped = cv2.warpAffine(img, mat, (crop_size, crop_size), borderMode=cv2.BORDER_REPLICATE) + + mat_rev = cv2.invertAffineTransform(mat) + + # # inverse the Affine transformation matrix + # mat_rev = np.zeros([2, 3]) + # div1 = mat[0][0] * mat[1][1] - mat[0][1] * mat[1][0] + # mat_rev[0][0] = mat[1][1] / div1 + # mat_rev[0][1] = -mat[0][1] / div1 + # mat_rev[0][2] = -(mat[0][2] * mat[1][1] - mat[0][1] * mat[1][2]) / div1 + # div2 = mat[0][1] * mat[1][0] - mat[0][0] * mat[1][1] + # mat_rev[1][0] = mat[1][0] / div2 + # mat_rev[1][1] = -mat[0][0] / div2 + # mat_rev[1][2] = -(mat[0][2] * mat[1][0] - mat[0][0] * mat[1][2]) / div2 + + return warped, mat_rev + + +def apply_roi_func(img, box, facial5points): + box = np.round(np.array(box)).astype(int)[:4] + + # roi_pad = int(0.5 * max([box[2] - box[0], box[3] - box[1]])) + roi_pad = 0 + roi_box = np.array([ + max(0, box[0] - roi_pad), + max(0, box[1] - roi_pad), + min(img.shape[1], box[2] + roi_pad), + min(img.shape[0], box[3] + roi_pad) + ]) + + roi = img[roi_box[1]:roi_box[3], roi_box[0]:roi_box[2]].copy() + + mrow1 = roi_box[1] + mcol1 = roi_box[0] + + roi_facial5points = facial5points.copy() + + roi_facial5points[:, 0] -= mcol1 + roi_facial5points[:, 1] -= mrow1 + + return roi, roi_box, roi_facial5points diff --git a/face_detect/face_detect.py b/face_detect/face_detect.py new file mode 100644 index 0000000000000000000000000000000000000000..57d9bc5ed5802950fd1c011fc103bdb621ab164e --- /dev/null +++ b/face_detect/face_detect.py @@ -0,0 +1,87 @@ +# -- coding: utf-8 -- +# @Time : 2021/11/10 +# @Author : ykk648 +# @Project : https://github.com/ykk648/AI_power +import numpy as np +import cv2 +from .scrfd_insightface import SCRFD +import os + +def np_norm(x): + return (x - np.average(x)) / np.std(x) + +SCRFD_MODEL_PATH = 'pretrain_models/' + + +class FaceDetect: + def __init__(self, mode='scrfd_500m', tracking_thres=0.15): + self.tracking_thres = tracking_thres + self.last_bboxes_ = [] + self.dis_list = [] + self.bboxes = self.kpss = self.image = None + if 'scrfd' in mode: + scrfd_model_path = SCRFD_MODEL_PATH + 'scrfd_500m_bnkps_shape640x640.onnx' + self.det_model = SCRFD(scrfd_model_path) + self.det_model.prepare(ctx_id=0, input_size=(640, 640)) + elif mode == 'mtcnn': + pass + + def get_bboxes(self, image, nms_thresh=0.5, max_num=0, tracking_init_bbox=None): + if type(image) == str: + image = cv2.cvtColor(cv2.imread(image), cv2.COLOR_BGR2RGB) + elif type(image) == np.ndarray: + # print('Got np array, assert its cv2 output.') + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + # tracking logic + if tracking_init_bbox is None: + self.last_bboxes_ = None + self.bboxes, self.kpss = self.det_model.detect(image, thresh=nms_thresh, max_num=max_num, + metric='default') + return True, self.bboxes, self.kpss + # return self.bboxes, self.kpss + else: + self.bboxes, self.kpss = self.det_model.detect(image, thresh=nms_thresh, max_num=max_num, + metric='default') + if not self.last_bboxes_: + return self.tracking_filter(tracking_init_bbox) + else: + return self.tracking_filter(self.last_bboxes_[0]) + + def tracking_filter(self, tracking_init_bbox): + self.dis_list = [] + for i in range(len(self.bboxes)): + eye_dis = np.linalg.norm(self.kpss[0][0] - self.kpss[0][1]) + self.dis_list.append( + np.linalg.norm(np_norm(self.bboxes[i] / eye_dis) - np_norm(tracking_init_bbox / eye_dis))) + # print(self.dis_list) + if not self.dis_list or np.min(np.array(self.dis_list)) > self.tracking_thres: + # print('ok',np.min(np.array(self.dis_list)) ) + self.last_bboxes_ = None + return False, [], [] + # print(np.min(np.array(self.dis_list))) + best_index = np.argmin(np.array(self.dis_list)) + + self.last_bboxes_ = [self.bboxes[best_index]] + return True, self.last_bboxes_, [self.kpss[best_index]] + +# if __name__ == '__main__': +# +# fd = FaceDetect() +# img_path = 'test_img/fake.jpeg' +# bboxes, kpss = fd.get_bboxes(img_path) +# +# img = cv2.imread(img_path) +# +# for i in range(bboxes.shape[0]): +# bbox = bboxes[i] +# x1, y1, x2, y2, score = bbox.astype(int) +# cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2) +# if kpss is not None: +# kps = kpss[i] +# for kp in kps: +# kp = kp.astype(int) +# cv2.circle(img, tuple(kp), 1, (0, 0, 255), 2) +# filename = img_path.split('/')[-1] +# print('output:', filename) +# cv2.imwrite('./%s' % filename, img) diff --git a/face_detect/scrfd_insightface/__init__.py b/face_detect/scrfd_insightface/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1b9dbd206e4408d04b0acb8aaf68e37dee0a05bf --- /dev/null +++ b/face_detect/scrfd_insightface/__init__.py @@ -0,0 +1,5 @@ +# -- coding: utf-8 -- +# @Time : 2021/11/10 +# @Author : ykk648 +# @Project : https://github.com/ykk648/AI_power +from .scrfd import SCRFD \ No newline at end of file diff --git a/face_detect/scrfd_insightface/scrfd.py b/face_detect/scrfd_insightface/scrfd.py new file mode 100644 index 0000000000000000000000000000000000000000..a08fc7124cc983e3a43c557b2a59f07cbb961bf1 --- /dev/null +++ b/face_detect/scrfd_insightface/scrfd.py @@ -0,0 +1,284 @@ +import numpy as np +import os.path as osp +import cv2 +import onnxruntime # gpu +import os + +def softmax(z): + assert len(z.shape) == 2 + s = np.max(z, axis=1) + s = s[:, np.newaxis] # necessary step to do broadcasting + e_x = np.exp(z - s) + div = np.sum(e_x, axis=1) + div = div[:, np.newaxis] # dito + return e_x / div + + +def distance2bbox(points, distance, max_shape=None): + """Decode distance prediction to bounding box. + + Args: + points (Tensor): Shape (n, 2), [x, y]. + distance (Tensor): Distance from the given point to 4 + boundaries (left, top, right, bottom). + max_shape (tuple): Shape of the image. + + Returns: + Tensor: Decoded bboxes. + """ + x1 = points[:, 0] - distance[:, 0] + y1 = points[:, 1] - distance[:, 1] + x2 = points[:, 0] + distance[:, 2] + y2 = points[:, 1] + distance[:, 3] + if max_shape is not None: + x1 = x1.clamp(min=0, max=max_shape[1]) + y1 = y1.clamp(min=0, max=max_shape[0]) + x2 = x2.clamp(min=0, max=max_shape[1]) + y2 = y2.clamp(min=0, max=max_shape[0]) + return np.stack([x1, y1, x2, y2], axis=-1) + + +def distance2kps(points, distance, max_shape=None): + """Decode distance prediction to bounding box. + + Args: + points (Tensor): Shape (n, 2), [x, y]. + distance (Tensor): Distance from the given point to 4 + boundaries (left, top, right, bottom). + max_shape (tuple): Shape of the image. + + Returns: + Tensor: Decoded bboxes. + """ + preds = [] + for i in range(0, distance.shape[1], 2): + px = points[:, i % 2] + distance[:, i] + py = points[:, i % 2 + 1] + distance[:, i + 1] + if max_shape is not None: + px = px.clamp(min=0, max=max_shape[1]) + py = py.clamp(min=0, max=max_shape[0]) + preds.append(px) + preds.append(py) + return np.stack(preds, axis=-1) + + +class SCRFD: + def __init__(self, model_file=None, session=None): + + self.model_file = model_file + self.session = session + self.taskname = 'detection' + self.batched = False + if self.session is None: + # CPUExecutionProvider CUDAExecutionProvider + # self.session = onnxruntime.InferenceSession(self.model_file, None, providers=["CPUExecutionProvider"]) + self.session = onnxruntime.InferenceSession(self.model_file, None, providers=["CUDAExecutionProvider"]) + self.center_cache = {} + self.nms_thresh = 0.4 + self._init_vars() + + def _init_vars(self): + input_cfg = self.session.get_inputs()[0] + input_shape = input_cfg.shape + if isinstance(input_shape[2], str): + self.input_size = None + else: + self.input_size = tuple(input_shape[2:4][::-1]) + input_name = input_cfg.name + outputs = self.session.get_outputs() + if len(outputs[0].shape) == 3: + self.batched = True + output_names = [] + for o in outputs: + output_names.append(o.name) + self.input_name = input_name + self.output_names = output_names + self.use_kps = False + self._num_anchors = 1 + if len(outputs) == 6: + self.fmc = 3 + self._feat_stride_fpn = [8, 16, 32] + self._num_anchors = 2 + elif len(outputs) == 9: + self.fmc = 3 + self._feat_stride_fpn = [8, 16, 32] + self._num_anchors = 2 + self.use_kps = True + elif len(outputs) == 10: + self.fmc = 5 + self._feat_stride_fpn = [8, 16, 32, 64, 128] + self._num_anchors = 1 + elif len(outputs) == 15: + self.fmc = 5 + self._feat_stride_fpn = [8, 16, 32, 64, 128] + self._num_anchors = 1 + self.use_kps = True + + def prepare(self, ctx_id, **kwargs): + if ctx_id < 0: + self.session.set_providers(['CPUExecutionProvider']) + nms_thresh = kwargs.get('nms_thresh', None) + if nms_thresh is not None: + self.nms_thresh = nms_thresh + input_size = kwargs.get('input_size', None) + if input_size is not None: + if self.input_size is not None: + print('warning: det_size is already set in scrfd model, ignore') + else: + self.input_size = input_size + + def forward(self, img, thresh): + scores_list = [] + bboxes_list = [] + kpss_list = [] + input_size = tuple(img.shape[0:2][::-1]) + blob = cv2.dnn.blobFromImage(img, 1.0 / 128, input_size, (127.5, 127.5, 127.5), swapRB=False) + net_outs = self.session.run(self.output_names, {self.input_name: blob}) + + input_height = blob.shape[2] + input_width = blob.shape[3] + fmc = self.fmc + for idx, stride in enumerate(self._feat_stride_fpn): + # If model support batch dim, take first output + if self.batched: + scores = net_outs[idx][0] + bbox_preds = net_outs[idx + fmc][0] + bbox_preds = bbox_preds * stride + if self.use_kps: + kps_preds = net_outs[idx + fmc * 2][0] * stride + # If model doesn't support batching take output as is + else: + scores = net_outs[idx] + bbox_preds = net_outs[idx + fmc] + bbox_preds = bbox_preds * stride + if self.use_kps: + kps_preds = net_outs[idx + fmc * 2] * stride + + height = input_height // stride + width = input_width // stride + K = height * width + key = (height, width, stride) + if key in self.center_cache: + anchor_centers = self.center_cache[key] + else: + # solution-1, c style: + # anchor_centers = np.zeros( (height, width, 2), dtype=np.float32 ) + # for i in range(height): + # anchor_centers[i, :, 1] = i + # for i in range(width): + # anchor_centers[:, i, 0] = i + + # solution-2: + # ax = np.arange(width, dtype=np.float32) + # ay = np.arange(height, dtype=np.float32) + # xv, yv = np.meshgrid(np.arange(width), np.arange(height)) + # anchor_centers = np.stack([xv, yv], axis=-1).astype(np.float32) + + # solution-3: + anchor_centers = np.stack(np.mgrid[:height, :width][::-1], axis=-1).astype(np.float32) + # print(anchor_centers.shape) + + anchor_centers = (anchor_centers * stride).reshape((-1, 2)) + if self._num_anchors > 1: + anchor_centers = np.stack([anchor_centers] * self._num_anchors, axis=1).reshape((-1, 2)) + if len(self.center_cache) < 100: + self.center_cache[key] = anchor_centers + + pos_inds = np.where(scores >= thresh)[0] + bboxes = distance2bbox(anchor_centers, bbox_preds) + pos_scores = scores[pos_inds] + pos_bboxes = bboxes[pos_inds] + scores_list.append(pos_scores) + bboxes_list.append(pos_bboxes) + if self.use_kps: + kpss = distance2kps(anchor_centers, kps_preds) + # kpss = kps_preds + kpss = kpss.reshape((kpss.shape[0], -1, 2)) + pos_kpss = kpss[pos_inds] + kpss_list.append(pos_kpss) + return scores_list, bboxes_list, kpss_list + + def detect(self, img, thresh=0.5, input_size=None, max_num=0, metric='default'): + assert input_size is not None or self.input_size is not None + input_size = self.input_size if input_size is None else input_size + + im_ratio = float(img.shape[0]) / img.shape[1] + model_ratio = float(input_size[1]) / input_size[0] + if im_ratio > model_ratio: + new_height = input_size[1] + new_width = int(new_height / im_ratio) + else: + new_width = input_size[0] + new_height = int(new_width * im_ratio) + det_scale = float(new_height) / img.shape[0] + resized_img = cv2.resize(img, (new_width, new_height)) + det_img = np.zeros((input_size[1], input_size[0], 3), dtype=np.uint8) + det_img[:new_height, :new_width, :] = resized_img + + scores_list, bboxes_list, kpss_list = self.forward(det_img, thresh) + + scores = np.vstack(scores_list) + scores_ravel = scores.ravel() + order = scores_ravel.argsort()[::-1] + bboxes = np.vstack(bboxes_list) / det_scale + if self.use_kps: + kpss = np.vstack(kpss_list) / det_scale + pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False) + pre_det = pre_det[order, :] + keep = self.nms(pre_det) + det = pre_det[keep, :] + if self.use_kps: + kpss = kpss[order, :, :] + kpss = kpss[keep, :, :] + else: + kpss = None + if max_num > 0 and det.shape[0] > max_num: + area = (det[:, 2] - det[:, 0]) * (det[:, 3] - + det[:, 1]) + img_center = img.shape[0] // 2, img.shape[1] // 2 + offsets = np.vstack([ + (det[:, 0] + det[:, 2]) / 2 - img_center[1], + (det[:, 1] + det[:, 3]) / 2 - img_center[0] + ]) + offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) + if metric == 'max': + values = area + else: + values = area - offset_dist_squared * 2.0 # some extra weight on the centering + bindex = np.argsort( + values)[::-1] # some extra weight on the centering + bindex = bindex[0:max_num] + det = det[bindex, :] + if kpss is not None: + kpss = kpss[bindex, :] + return det, kpss + + def nms(self, dets): + thresh = self.nms_thresh + x1 = dets[:, 0] + y1 = dets[:, 1] + x2 = dets[:, 2] + y2 = dets[:, 3] + scores = dets[:, 4] + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = scores.argsort()[::-1] + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (areas[i] + areas[order[1:]] - inter) + + inds = np.where(ovr <= thresh)[0] + order = order[inds + 1] + + return keep diff --git a/face_feature/.idea/.gitignore b/face_feature/.idea/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..26d33521af10bcc7fd8cea344038eaaeb78d0ef5 --- /dev/null +++ b/face_feature/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/face_feature/.idea/Facefeature_mini.iml b/face_feature/.idea/Facefeature_mini.iml new file mode 100644 index 0000000000000000000000000000000000000000..8a05c6ed5f0c89c2998d9aee8978f53136f7649a --- /dev/null +++ b/face_feature/.idea/Facefeature_mini.iml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/face_feature/.idea/inspectionProfiles/Project_Default.xml b/face_feature/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000000000000000000000000000000000000..eb4fdeef28fe87ebcd85cb913d6a2555120aa9b2 --- /dev/null +++ b/face_feature/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,35 @@ + + + + \ No newline at end of file diff --git a/face_feature/.idea/inspectionProfiles/profiles_settings.xml b/face_feature/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000000000000000000000000000000000000..105ce2da2d6447d11dfe32bfb846c3d5b199fc99 --- /dev/null +++ b/face_feature/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/face_feature/.idea/modules.xml b/face_feature/.idea/modules.xml new file mode 100644 index 0000000000000000000000000000000000000000..eaf352b4949c2f43b600c46727a77cdb59c53815 --- /dev/null +++ b/face_feature/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/face_feature/.idea/vcs.xml b/face_feature/.idea/vcs.xml new file mode 100644 index 0000000000000000000000000000000000000000..94a25f7f4cb416c083d265558da75d457237d671 --- /dev/null +++ b/face_feature/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/face_feature/face_lib/__init__.py b/face_feature/face_lib/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/face_feature/face_lib/face_detect_and_align/__init__.py b/face_feature/face_lib/face_detect_and_align/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2e8e58ddd298d9a9776e3a74822b57c500609155 --- /dev/null +++ b/face_feature/face_lib/face_detect_and_align/__init__.py @@ -0,0 +1,3 @@ +from .face_align_5_landmarks import FaceDetect5Landmarks +from .face_align_utils import estimate_norm + diff --git a/face_feature/face_lib/face_detect_and_align/face_align_5_landmarks.py b/face_feature/face_lib/face_detect_and_align/face_align_5_landmarks.py new file mode 100644 index 0000000000000000000000000000000000000000..7f967ba06e8c092aa9d9e72cc4269cfe21e7952e --- /dev/null +++ b/face_feature/face_lib/face_detect_and_align/face_align_5_landmarks.py @@ -0,0 +1,151 @@ +# -- coding: utf-8 -- +# @Time : 2021/11/10 +# @Author : ykk648 +# @Project : https://github.com/ykk648/AI_power + +from PIL import Image +import numpy as np +import cv2 +from cv2box.utils.math import Normalize +from cv2box import CVImage + +from .scrfd_insightface import SCRFD +# from .mtcnn_pytorch import MTCNN +from face_feature.face_lib.face_detect_and_align.face_align_utils import norm_crop + +# https://github.com/deepinsight/insightface/tree/master/detection/scrfd +SCRFD_MODEL_PATH = 'pretrain_models/' + + +# https://github.com/taotaonice/FaceShifter/blob/master/face_modules/mtcnn.py +# & https://github.com/TropComplique/mtcnn-pytorch +# MTCNN_MODEL_PATH = 'pretrain_models/face_lib/face_detect/mtcnn_weights/' + + +class FaceDetect5Landmarks: + def __init__(self, mode='scrfd_500m', tracking=False): + self.mode = mode + self.tracking = tracking + self.dis_list = [] + self.last_bboxes_ = [] + assert self.mode in ['scrfd', 'scrfd_500m', 'mtcnn'] + self.bboxes = self.kpss = self.image = None + if 'scrfd' in self.mode: + if self.mode == 'scrfd_500m': + scrfd_model_path = SCRFD_MODEL_PATH + 'scrfd_500m_bnkps_shape640x640.onnx' + else: + scrfd_model_path = SCRFD_MODEL_PATH + 'scrfd_10g_bnkps.onnx' + self.det_model_scrfd = SCRFD(scrfd_model_path) + self.det_model_scrfd.prepare(ctx_id=0, input_size=(640, 640)) + # elif self.mode == 'mtcnn': + # assert not tracking + # self.det_model_mtcnn = MTCNN(model_dir=MTCNN_MODEL_PATH) + + def get_bboxes(self, image, nms_thresh=0.5, max_num=0, min_bbox_size=None): + """ + Args: + image: RGB image path or Numpy array load by cv2 + nms_thresh: + max_num: + min_bbox_size: + Returns: + """ + self.image = CVImage(image).rgb() + + if self.tracking: + if len(self.last_bboxes_) == 0: + self.bboxes, self.kpss = self.det_model_scrfd.detect_faces(image, thresh=nms_thresh, max_num=1, + metric='default') + self.last_bboxes_ = self.bboxes + # return self.bboxes, self.kpss + else: + self.bboxes, self.kpss = self.det_model_scrfd.detect_faces(image, thresh=nms_thresh, max_num=0, + metric='default') + self.bboxes, self.kpss = self.tracking_filter() + else: + if 'scrfd' in self.mode: + self.bboxes, self.kpss = self.det_model_scrfd.detect_faces(self.image, thresh=nms_thresh, + max_num=max_num, + metric='default') + else: + pil_image = Image.fromarray(self.image) + min_bbox_size = 64 if min_bbox_size is None else min_bbox_size + self.bboxes, self.kpss = self.det_model_mtcnn.detect_faces(pil_image, min_face_size=min_bbox_size, + thresholds=[0.6, 0.7, 0.8], + nms_thresholds=[0.7, 0.7, 0.7]) + return self.bboxes, self.kpss + + def tracking_filter(self): + for i in range(len(self.bboxes)): + self.dis_list.append( + np.linalg.norm(Normalize(self.bboxes[i]).np_norm() - Normalize(self.last_bboxes_[0]).np_norm())) + if not self.dis_list: + return [], [] + best_index = np.argmin(np.array(self.dis_list)) + self.dis_list = [] + self.last_bboxes_ = [self.bboxes[best_index]] + return self.last_bboxes_, [self.kpss[best_index]] + + def bboxes_filter(self, min_bbox_size): + min_area = np.power(min_bbox_size, 2) + area_list = (self.bboxes[:, 2] - self.bboxes[:, 0]) * (self.bboxes[:, 3] - self.bboxes[:, 1]) + min_index = np.where(area_list < min_area) + self.bboxes = np.delete(self.bboxes, min_index, axis=0) + self.kpss = np.delete(self.kpss, min_index, axis=0) + + def get_single_face(self, crop_size, mode='mtcnn_512'): + """ + Args: + crop_size: + mode: default mtcnn_512 arcface_512 arcface default_95 + Returns: cv2 image + """ + assert mode in ['default', 'mtcnn_512', 'mtcnn_256', 'arcface_512', 'arcface', 'default_95'] + if self.bboxes.shape[0] == 0: + return None, None + det_score = self.bboxes[..., 4] + if self.tracking: + best_index = np.argmax(np.array(self.dis_list)) + kpss = None + if self.kpss is not None: + kpss = self.kpss[best_index] + else: + best_index = np.argmax(det_score) + kpss = None + if self.kpss is not None: + kpss = self.kpss[best_index] + align_img, M = norm_crop(self.image, kpss, crop_size, mode=mode) + align_img = cv2.cvtColor(align_img, cv2.COLOR_RGB2BGR) + return align_img, M + + def get_multi_face(self, crop_size, mode='mtcnn_512'): + """ + Args: + crop_size: + mode: default mtcnn_512 arcface_512 arcface + Returns: + """ + if self.bboxes.shape[0] == 0: + return None + align_img_list = [] + M_list = [] + for i in range(self.bboxes.shape[0]): + kps = None + if self.kpss is not None: + kps = self.kpss[i] + align_img, M = norm_crop(self.image, kps, crop_size, mode=mode) + align_img_list.append(align_img) + M_list.append(M) + return align_img_list, M_list + + def draw_face(self): + for i_ in range(self.bboxes.shape[0]): + bbox = self.bboxes[i_] + x1, y1, x2, y2, score = bbox.astype(int) + cv2.rectangle(self.image, (x1, y1), (x2, y2), (255, 0, 0), 2) + if self.kpss is not None: + kps = self.kpss[i_] + for kp in kps: + kp = kp.astype(int) + cv2.circle(self.image, tuple(kp), 1, (0, 0, 255), 2) + CVImage(self.image, image_format='cv2').show() diff --git a/face_feature/face_lib/face_detect_and_align/face_align_utils.py b/face_feature/face_lib/face_detect_and_align/face_align_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..6dc544055e762f2e19753b34e749cbeb7ae76594 --- /dev/null +++ b/face_feature/face_lib/face_detect_and_align/face_align_utils.py @@ -0,0 +1,107 @@ +import cv2 +import numpy as np +from skimage import transform as trans + +src1 = np.array([[51.642, 50.115], [57.617, 49.990], [35.740, 69.007], + [51.157, 89.050], [57.025, 89.702]], + dtype=np.float32) +# <--left +src2 = np.array([[45.031, 50.118], [65.568, 50.872], [39.677, 68.111], + [45.177, 86.190], [64.246, 86.758]], + dtype=np.float32) + +# ---frontal +src3 = np.array([[39.730, 51.138], [72.270, 51.138], [56.000, 68.493], + [42.463, 87.010], [69.537, 87.010]], + dtype=np.float32) + +# -->right +src4 = np.array([[46.845, 50.872], [67.382, 50.118], [72.737, 68.111], + [48.167, 86.758], [67.236, 86.190]], + dtype=np.float32) + +# -->right profile +src5 = np.array([[54.796, 49.990], [60.771, 50.115], [76.673, 69.007], + [55.388, 89.702], [61.257, 89.050]], + dtype=np.float32) + +multi_src = np.array([src1, src2, src3, src4, src5]) +multi_src_map = {112: multi_src, 224: multi_src * 2, 512: multi_src * (512 / 112)} + +arcface_src = np.array( + [[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366], + [41.5493, 92.3655], [70.7299, 92.2041]], + dtype=np.float32) + +# mtcnn_src = [ +# [30.29459953, 51.69630051], [65.53179932, 51.50139999], [48.02519989, 71.73660278], +# [33.54930115, 92.3655014], [62.72990036, 92.20410156] +# ] +# tmp_crop_size = np.array((96, 112)) +# size_diff = max(tmp_crop_size) - tmp_crop_size +# mtcnn_src += size_diff / 2 +# ref_pts = np.float32(mtcnn_src) +# ref_pts = (ref_pts - 112 / 2) * 0.85 + 112 / 2 +# ref_pts *= 512 / 112. +# mtcnn_src_512 = ref_pts +# print(mtcnn_src_512) + +mtcnn_512 = [[187.20187, 239.27705], + [324.1236, 238.51973], + [256.09793, 317.14795], + [199.84871, 397.30597], + [313.2362, 396.6788]] + +mtcnn_256 = np.array(mtcnn_512) * 0.5 + +arcface_src_512 = arcface_src * np.array([512 / 112, 512 / 112]) +arcface_src = np.expand_dims(arcface_src, axis=0) + + +def get_src_modify(srcs, arcface_src): + srcs += ((arcface_src[2] - srcs[2][2]) * np.array([1, 1.8]))[None] + return srcs + + +# lmk is prediction; src is template +def estimate_norm(lmk, image_size=112, mode='arcface'): + assert lmk.shape == (5, 2) + tform = trans.SimilarityTransform() + lmk_tran = np.insert(lmk, 2, values=np.ones(5), axis=1) + min_M = [] + min_index = [] + min_error = float('inf') + if mode == 'arcface': + assert image_size == 112 + src = arcface_src + elif mode == 'arcface_512': + src = np.expand_dims(arcface_src_512, axis=0) + elif mode == 'mtcnn_512': + src = np.expand_dims(mtcnn_512, axis=0) + elif mode == 'mtcnn_256': + src = np.expand_dims(mtcnn_256, axis=0) + elif mode == 'default_95': + src = get_src_modify(multi_src, arcface_src[0]) + src_map = {112: src.copy(), 224: src.copy() * 2, 256: src.copy() * 256 / 112 * 0.95, + 512: src.copy() * (512 / 112) * 0.95} + src = src_map[image_size] + else: + src = multi_src_map[image_size] + for i in np.arange(src.shape[0]): + tform.estimate(lmk, src[i]) + M = tform.params[0:2, :] + results = np.dot(M, lmk_tran.T) + results = results.T + error = np.sum(np.sqrt(np.sum((results - src[i]) ** 2, axis=1))) + # print(error) + if error < min_error: + min_error = error + min_M = M + min_index = i + return min_M, min_index + + +def norm_crop(img, landmark, crop_size=112, mode='arcface'): + M, pose_index = estimate_norm(landmark, crop_size, mode) + warped = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0) + return warped, M diff --git a/face_feature/face_lib/face_detect_and_align/scrfd_insightface/__init__.py b/face_feature/face_lib/face_detect_and_align/scrfd_insightface/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1b9dbd206e4408d04b0acb8aaf68e37dee0a05bf --- /dev/null +++ b/face_feature/face_lib/face_detect_and_align/scrfd_insightface/__init__.py @@ -0,0 +1,5 @@ +# -- coding: utf-8 -- +# @Time : 2021/11/10 +# @Author : ykk648 +# @Project : https://github.com/ykk648/AI_power +from .scrfd import SCRFD \ No newline at end of file diff --git a/face_feature/face_lib/face_detect_and_align/scrfd_insightface/scrfd.py b/face_feature/face_lib/face_detect_and_align/scrfd_insightface/scrfd.py new file mode 100644 index 0000000000000000000000000000000000000000..361bf5e139e872d48c595cb326400387385dde4a --- /dev/null +++ b/face_feature/face_lib/face_detect_and_align/scrfd_insightface/scrfd.py @@ -0,0 +1,294 @@ +# -*- coding: utf-8 -*- +# @Organization : insightface.ai +# @Author : Jia Guo +# @Time : 2021-05-04 +# @Function : + +from __future__ import division +import datetime +import numpy as np +import onnxruntime +import os +import os.path as osp +import cv2 +import sys + + +def softmax(z): + assert len(z.shape) == 2 + s = np.max(z, axis=1) + s = s[:, np.newaxis] # necessary step to do broadcasting + e_x = np.exp(z - s) + div = np.sum(e_x, axis=1) + div = div[:, np.newaxis] # dito + return e_x / div + + +def distance2bbox(points, distance, max_shape=None): + """Decode distance prediction to bounding box. + + Args: + points (Tensor): Shape (n, 2), [x, y]. + distance (Tensor): Distance from the given point to 4 + boundaries (left, top, right, bottom). + max_shape (tuple): Shape of the image. + + Returns: + Tensor: Decoded bboxes. + """ + x1 = points[:, 0] - distance[:, 0] + y1 = points[:, 1] - distance[:, 1] + x2 = points[:, 0] + distance[:, 2] + y2 = points[:, 1] + distance[:, 3] + if max_shape is not None: + x1 = x1.clamp(min=0, max=max_shape[1]) + y1 = y1.clamp(min=0, max=max_shape[0]) + x2 = x2.clamp(min=0, max=max_shape[1]) + y2 = y2.clamp(min=0, max=max_shape[0]) + return np.stack([x1, y1, x2, y2], axis=-1) + + +def distance2kps(points, distance, max_shape=None): + """Decode distance prediction to bounding box. + + Args: + points (Tensor): Shape (n, 2), [x, y]. + distance (Tensor): Distance from the given point to 4 + boundaries (left, top, right, bottom). + max_shape (tuple): Shape of the image. + + Returns: + Tensor: Decoded bboxes. + """ + preds = [] + for i in range(0, distance.shape[1], 2): + px = points[:, i % 2] + distance[:, i] + py = points[:, i % 2 + 1] + distance[:, i + 1] + if max_shape is not None: + px = px.clamp(min=0, max=max_shape[1]) + py = py.clamp(min=0, max=max_shape[0]) + preds.append(px) + preds.append(py) + return np.stack(preds, axis=-1) + + +class SCRFD: + def __init__(self, model_file=None, session=None): + import onnxruntime + self.model_file = model_file + self.session = session + self.taskname = 'detection' + self.batched = False + if self.session is None: + assert self.model_file is not None + # assert osp.exists(self.model_file) + self.session = onnxruntime.InferenceSession(self.model_file, providers=["CUDAExecutionProvider"]) + self.center_cache = {} + self.nms_thresh = 0.4 + self._init_vars() + + def _init_vars(self): + input_cfg = self.session.get_inputs()[0] + input_shape = input_cfg.shape + if isinstance(input_shape[2], str): + self.input_size = None + else: + self.input_size = tuple(input_shape[2:4][::-1]) + input_name = input_cfg.name + outputs = self.session.get_outputs() + if len(outputs[0].shape) == 3: + self.batched = True + output_names = [] + for o in outputs: + output_names.append(o.name) + self.input_name = input_name + self.output_names = output_names + self.use_kps = False + self._num_anchors = 1 + if len(outputs) == 6: + self.fmc = 3 + self._feat_stride_fpn = [8, 16, 32] + self._num_anchors = 2 + elif len(outputs) == 9: + self.fmc = 3 + self._feat_stride_fpn = [8, 16, 32] + self._num_anchors = 2 + self.use_kps = True + elif len(outputs) == 10: + self.fmc = 5 + self._feat_stride_fpn = [8, 16, 32, 64, 128] + self._num_anchors = 1 + elif len(outputs) == 15: + self.fmc = 5 + self._feat_stride_fpn = [8, 16, 32, 64, 128] + self._num_anchors = 1 + self.use_kps = True + + def prepare(self, ctx_id, **kwargs): + if ctx_id < 0: + self.session.set_providers(['CPUExecutionProvider']) + nms_thresh = kwargs.get('nms_thresh', None) + if nms_thresh is not None: + self.nms_thresh = nms_thresh + input_size = kwargs.get('input_size', None) + if input_size is not None: + if self.input_size is not None: + print('warning: det_size is already set in scrfd model, ignore') + else: + self.input_size = input_size + + def forward(self, img, thresh): + scores_list = [] + bboxes_list = [] + kpss_list = [] + input_size = tuple(img.shape[0:2][::-1]) + blob = cv2.dnn.blobFromImage(img, 1.0 / 128, input_size, (127.5, 127.5, 127.5), swapRB=False) + net_outs = self.session.run(self.output_names, {self.input_name: blob}) + + input_height = blob.shape[2] + input_width = blob.shape[3] + fmc = self.fmc + for idx, stride in enumerate(self._feat_stride_fpn): + # If model support batch dim, take first output + if self.batched: + scores = net_outs[idx][0] + bbox_preds = net_outs[idx + fmc][0] + bbox_preds = bbox_preds * stride + if self.use_kps: + kps_preds = net_outs[idx + fmc * 2][0] * stride + # If model doesn't support batching take output as is + else: + scores = net_outs[idx] + bbox_preds = net_outs[idx + fmc] + bbox_preds = bbox_preds * stride + if self.use_kps: + kps_preds = net_outs[idx + fmc * 2] * stride + + height = input_height // stride + width = input_width // stride + K = height * width + key = (height, width, stride) + if key in self.center_cache: + anchor_centers = self.center_cache[key] + else: + # solution-1, c style: + # anchor_centers = np.zeros( (height, width, 2), dtype=np.float32 ) + # for i in range(height): + # anchor_centers[i, :, 1] = i + # for i in range(width): + # anchor_centers[:, i, 0] = i + + # solution-2: + # ax = np.arange(width, dtype=np.float32) + # ay = np.arange(height, dtype=np.float32) + # xv, yv = np.meshgrid(np.arange(width), np.arange(height)) + # anchor_centers = np.stack([xv, yv], axis=-1).astype(np.float32) + + # solution-3: + anchor_centers = np.stack(np.mgrid[:height, :width][::-1], axis=-1).astype(np.float32) + # print(anchor_centers.shape) + + anchor_centers = (anchor_centers * stride).reshape((-1, 2)) + if self._num_anchors > 1: + anchor_centers = np.stack([anchor_centers] * self._num_anchors, axis=1).reshape((-1, 2)) + if len(self.center_cache) < 100: + self.center_cache[key] = anchor_centers + + pos_inds = np.where(scores >= thresh)[0] + bboxes = distance2bbox(anchor_centers, bbox_preds) + pos_scores = scores[pos_inds] + pos_bboxes = bboxes[pos_inds] + scores_list.append(pos_scores) + bboxes_list.append(pos_bboxes) + if self.use_kps: + kpss = distance2kps(anchor_centers, kps_preds) + # kpss = kps_preds + kpss = kpss.reshape((kpss.shape[0], -1, 2)) + pos_kpss = kpss[pos_inds] + kpss_list.append(pos_kpss) + return scores_list, bboxes_list, kpss_list + + def detect_faces(self, img, thresh=0.5, input_size=None, max_num=0, metric='default'): + assert input_size is not None or self.input_size is not None + input_size = self.input_size if input_size is None else input_size + + im_ratio = float(img.shape[0]) / img.shape[1] + model_ratio = float(input_size[1]) / input_size[0] + if im_ratio > model_ratio: + new_height = input_size[1] + new_width = int(new_height / im_ratio) + else: + new_width = input_size[0] + new_height = int(new_width * im_ratio) + det_scale = float(new_height) / img.shape[0] + resized_img = cv2.resize(img, (new_width, new_height)) + det_img = np.zeros((input_size[1], input_size[0], 3), dtype=np.uint8) + det_img[:new_height, :new_width, :] = resized_img + + scores_list, bboxes_list, kpss_list = self.forward(det_img, thresh) + + scores = np.vstack(scores_list) + scores_ravel = scores.ravel() + order = scores_ravel.argsort()[::-1] + bboxes = np.vstack(bboxes_list) / det_scale + if self.use_kps: + kpss = np.vstack(kpss_list) / det_scale + pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False) + pre_det = pre_det[order, :] + keep = self.nms(pre_det) + det = pre_det[keep, :] + if self.use_kps: + kpss = kpss[order, :, :] + kpss = kpss[keep, :, :] + else: + kpss = None + if 0 < max_num < det.shape[0]: + area = (det[:, 2] - det[:, 0]) * (det[:, 3] - + det[:, 1]) + img_center = img.shape[0] // 2, img.shape[1] // 2 + offsets = np.vstack([ + (det[:, 0] + det[:, 2]) / 2 - img_center[1], + (det[:, 1] + det[:, 3]) / 2 - img_center[0] + ]) + offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) + if metric == 'max': + values = area + else: + values = area - offset_dist_squared * 2.0 # some extra weight on the centering + bindex = np.argsort( + values)[::-1] # some extra weight on the centering + bindex = bindex[0:max_num] + det = det[bindex, :] + if kpss is not None: + kpss = kpss[bindex, :] + return det, kpss + + def nms(self, dets): + thresh = self.nms_thresh + x1 = dets[:, 0] + y1 = dets[:, 1] + x2 = dets[:, 2] + y2 = dets[:, 3] + scores = dets[:, 4] + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = scores.argsort()[::-1] + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (areas[i] + areas[order[1:]] - inter) + + inds = np.where(ovr <= thresh)[0] + order = order[inds + 1] + + return keep diff --git a/face_feature/face_lib/face_embedding/__init__.py b/face_feature/face_lib/face_embedding/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8a57b368fc251d8e38fc54a7825f4fa20dae22d3 --- /dev/null +++ b/face_feature/face_lib/face_embedding/__init__.py @@ -0,0 +1 @@ +from .face_embedding import FaceEmbedding diff --git a/face_feature/face_lib/face_embedding/face_embedding.py b/face_feature/face_lib/face_embedding/face_embedding.py new file mode 100644 index 0000000000000000000000000000000000000000..8105022c0f6e52b86694e97a54f5f500166f42fc --- /dev/null +++ b/face_feature/face_lib/face_embedding/face_embedding.py @@ -0,0 +1,63 @@ +# -- coding: utf-8 -- +# @Time : 2021/11/10 +# @Author : ykk648 +# @Project : https://github.com/ykk648/AI_power +from cv2box import CVImage, MyFpsCounter +from cv2box.utils.math import Normalize +from model_lib import ModelBase +import torchvision.transforms as transforms +import numpy as np +from PIL import Image +import torch +import torch.nn.functional as F +import cv2 +def down_sample(target_, size): + import torch.nn.functional as F + return F.interpolate(target_, size=size, mode='bilinear', align_corners=True) + + +MODEL_ZOO = { + 'CurricularFace-tjm': { + 'model_path': 'pretrain_models/CurricularFace.tjm', + } +} + +class FaceEmbedding(ModelBase): + def __init__(self, model_type='CurricularFace-tjm', provider='gpu'): + super().__init__(MODEL_ZOO[model_type], provider) + self.model_type = model_type + self.input_std = self.input_mean = 127.5 + self.input_size = (112, 112) + self.transformer = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) + ]) + self.gpu = True if provider=='gpu' else False + + def latent_from_image(self, face_image): + if type(face_image) == str: + face_image = cv2.imread(face_image) + # face_image = cv2.resize(face_image, (224, 224)) + face_image = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB) + face_image = Image.fromarray(face_image) + elif type(face_image) == np.ndarray: + face_image = Image.fromarray(face_image) + # print('got np array, assert its cv2 output.') + with torch.no_grad(): + face = self.transformer(face_image) + face = face.unsqueeze(0) + if self.gpu: + face = face.cuda() + # 输入尺寸为(112, 112) RGB + face = down_sample(face, size=[112, 112]) + # 人脸latent code为512维 + face_latent = self.model(face) + face_latent = F.normalize(face_latent, p=2, dim=1) + return face_latent[0] + +if __name__ == '__main__': + # CurricularFace + fb_cur = FaceEmbedding(model_type='CurricularFace-tjm', provider='gpu') + latent_cur = fb_cur.latent_from_image('data/source/ym-1.jpeg') + print(latent_cur.shape) + print(latent_cur) \ No newline at end of file diff --git a/face_feature/face_lib/face_landmark/pfpld.py b/face_feature/face_lib/face_landmark/pfpld.py new file mode 100644 index 0000000000000000000000000000000000000000..c2912fa681773cafa7c6f0d1ac4f95d7ab0568a7 --- /dev/null +++ b/face_feature/face_lib/face_landmark/pfpld.py @@ -0,0 +1,34 @@ +# -- coding: utf-8 -- +# @Time : 2022/8/25 +# @Author : ykk648 +# @Project : https://github.com/ykk648/AI_power +""" +Ref https://github.com/hanson-young/nniefacelib/tree/master/PFPLD/models/onnx +""" +import cv2 +import onnxruntime as ort +import numpy as np + +from cv2box import CVImage +from model_lib import ModelBase +from face_feature.face_lib.face_landmark.utils import convert98to68 + +MODEL_ZOO = { + 'pfpld': { + 'model_path': 'pretrain_models/pfpld_robust_sim_bs1_8003.onnx', + 'model_input_size': (112, 112), + }, +} + + +class PFPLD(ModelBase): + def __init__(self, model_name='pfpld', provider='gpu', cpu=False): + super().__init__(MODEL_ZOO[model_name], provider) + + def forward(self, face_image): + input_image_shape = face_image.shape + face_image = CVImage(face_image).resize((112, 112)).bgr + face_image = (face_image / 255).astype(np.float32) + pred = self.model.forward(face_image, trans=True) + pred = convert98to68(pred[1]) + return pred.reshape(-1, 68, 2) * input_image_shape[:2][::-1] diff --git a/face_feature/face_lib/face_landmark/utils.py b/face_feature/face_lib/face_landmark/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..93fdba7d9ea3445d4442d82ee40b6e47e18ec112 --- /dev/null +++ b/face_feature/face_lib/face_landmark/utils.py @@ -0,0 +1,77 @@ +# -- coding: utf-8 -- +# @Time : 2022/8/25 +# @Author : ykk648 +# @Project : https://github.com/ykk648/AI_power + +import numpy as np + + +def convert98to68(list_info): + points = list_info[0, 0:196] + info_68 = [] + for j in range(17): + x = points[j * 2 * 2 + 0] + y = points[j * 2 * 2 + 1] + info_68.append(x) + info_68.append(y) + for j in range(33, 38): + x = points[j * 2 + 0] + y = points[j * 2 + 1] + info_68.append(x) + info_68.append(y) + for j in range(42, 47): + x = points[j * 2 + 0] + y = points[j * 2 + 1] + info_68.append(x) + info_68.append(y) + for j in range(51, 61): + x = points[j * 2 + 0] + y = points[j * 2 + 1] + info_68.append(x) + info_68.append(y) + point_38_x = (float(points[60 * 2 + 0]) + float(points[62 * 2 + 0])) / 2.0 + point_38_y = (float(points[60 * 2 + 1]) + float(points[62 * 2 + 1])) / 2.0 + point_39_x = (float(points[62 * 2 + 0]) + float(points[64 * 2 + 0])) / 2.0 + point_39_y = (float(points[62 * 2 + 1]) + float(points[64 * 2 + 1])) / 2.0 + point_41_x = (float(points[64 * 2 + 0]) + float(points[66 * 2 + 0])) / 2.0 + point_41_y = (float(points[64 * 2 + 1]) + float(points[66 * 2 + 1])) / 2.0 + point_42_x = (float(points[60 * 2 + 0]) + float(points[66 * 2 + 0])) / 2.0 + point_42_y = (float(points[60 * 2 + 1]) + float(points[66 * 2 + 1])) / 2.0 + point_44_x = (float(points[68 * 2 + 0]) + float(points[70 * 2 + 0])) / 2.0 + point_44_y = (float(points[68 * 2 + 1]) + float(points[70 * 2 + 1])) / 2.0 + point_45_x = (float(points[70 * 2 + 0]) + float(points[72 * 2 + 0])) / 2.0 + point_45_y = (float(points[70 * 2 + 1]) + float(points[72 * 2 + 1])) / 2.0 + point_47_x = (float(points[72 * 2 + 0]) + float(points[74 * 2 + 0])) / 2.0 + point_47_y = (float(points[72 * 2 + 1]) + float(points[74 * 2 + 1])) / 2.0 + point_48_x = (float(points[68 * 2 + 0]) + float(points[74 * 2 + 0])) / 2.0 + point_48_y = (float(points[68 * 2 + 1]) + float(points[74 * 2 + 1])) / 2.0 + info_68.append((point_38_x)) + info_68.append((point_38_y)) + info_68.append((point_39_x)) + info_68.append((point_39_y)) + info_68.append(points[64 * 2 + 0]) + info_68.append(points[64 * 2 + 1]) + info_68.append((point_41_x)) + info_68.append((point_41_y)) + info_68.append((point_42_x)) + info_68.append((point_42_y)) + info_68.append(points[68 * 2 + 0]) + info_68.append(points[68 * 2 + 1]) + info_68.append((point_44_x)) + info_68.append((point_44_y)) + info_68.append((point_45_x)) + info_68.append((point_45_y)) + info_68.append(points[72 * 2 + 0]) + info_68.append(points[72 * 2 + 1]) + info_68.append((point_47_x)) + info_68.append((point_47_y)) + info_68.append((point_48_x)) + info_68.append((point_48_y)) + for j in range(76, 96): + x = points[j * 2 + 0] + y = points[j * 2 + 1] + info_68.append(x) + info_68.append(y) + for j in range(len(list_info[196:])): + info_68.append(list_info[196 + j]) + return np.array(info_68) diff --git a/face_feature/hifi_image_api.py b/face_feature/hifi_image_api.py new file mode 100644 index 0000000000000000000000000000000000000000..98bfa3a2a2a165b5d0709ab8c4553fd91743c170 --- /dev/null +++ b/face_feature/hifi_image_api.py @@ -0,0 +1,54 @@ +# import os +# import sys +# path = os.path.dirname(__file__) +# sys.path.append(path) +from face_feature.face_lib.face_landmark.pfpld import PFPLD +from face_feature.face_lib.face_embedding import FaceEmbedding +from face_feature.face_lib.face_detect_and_align import FaceDetect5Landmarks +import cv2 +import numpy as np +from cv2box import CVImage +from PIL import Image +class HifiImage: + def __init__(self, crop_size=256): + """ + :param crop_size: 输出字典中展示图片的size + """ + self.crop_size = crop_size + + self.fe = FaceEmbedding(model_type='CurricularFace-tjm', provider='gpu') + self.scrfd_detector = FaceDetect5Landmarks(mode='scrfd_500m') + self.pfpld = PFPLD() + + self.image_feature_dict = {} + + + def get_face_feature(self, image_path): + if isinstance(image_path, str): + src_image = CVImage(image_path).rgb() + else: + src_image = np.array(image_path) + try: + borderpad = int(np.max([np.max(src_image.shape[:2]) * 0.1, 100])) + src_image = np.pad(src_image, ((borderpad, borderpad), (borderpad, borderpad), (0, 0)), 'constant', + constant_values=(0, 0)) + except Exception as e: + print(f'padding fail , got {e}') + return None + bboxes_scrfd, kpss_scrfd = self.scrfd_detector.get_bboxes(src_image, min_bbox_size=64) + image_face_crop_list, m_ = self.scrfd_detector.get_multi_face(crop_size=self.crop_size, + mode='mtcnn_256') + + img = np.array(image_face_crop_list[0]) + lm = self.pfpld.forward(img) + lm[0][5][0] = np.min([lm[0][5][0], lm[0][48][0] - 5]) + lm[0][14][0] = np.max([lm[0][14][0], lm[0][54][0] + 5]) + + img = cv2.rectangle(img, lm[0][11].ravel().astype(int), lm[0][14].ravel().astype(int), (0, 0, 0), -1) + img = cv2.rectangle(img, lm[0][2].ravel().astype(int), lm[0][5].ravel().astype(int), (0, 0, 0), -1) + + assert len(image_face_crop_list) == 1, 'only support single face in input image' + image_latent = self.fe.latent_from_image(img).cpu().numpy() + # image_latent = self.fe.forward(img) + crop_face = image_face_crop_list[0] + return image_latent, crop_face diff --git a/face_feature/model_lib/__init__.py b/face_feature/model_lib/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c8223f4ba295fe8974e978f67af58316326b29a9 --- /dev/null +++ b/face_feature/model_lib/__init__.py @@ -0,0 +1,3 @@ +from .base_wrapper import ONNXModel +from .model_base import ModelBase + diff --git a/face_feature/model_lib/base_wrapper/__init__.py b/face_feature/model_lib/base_wrapper/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..177e2c1426a7d39f1748f9b02523e8988502b091 --- /dev/null +++ b/face_feature/model_lib/base_wrapper/__init__.py @@ -0,0 +1,7 @@ +# -- coding: utf-8 -- +# @Time : 2022/8/26 +# @Author : ykk648 +# @Project : https://github.com/ykk648/AI_power +from .onnx_model import ONNXModel +from .onnx_model_picklable import OnnxModelPickable + diff --git a/face_feature/model_lib/base_wrapper/onnx_model.py b/face_feature/model_lib/base_wrapper/onnx_model.py new file mode 100644 index 0000000000000000000000000000000000000000..f2645f858463bd64235ab7369b6bec4149a63b44 --- /dev/null +++ b/face_feature/model_lib/base_wrapper/onnx_model.py @@ -0,0 +1,146 @@ +# -- coding: utf-8 -- +# @Time : 2021/11/29 +# @Author : ykk648 +# @Project : https://github.com/ykk648/AI_power + +""" +todo: io_binding https://onnxruntime.ai/docs/api/python/api_summary.html +""" + +import onnxruntime +import numpy as np +from cv2box import MyFpsCounter + + +def get_output_info(onnx_session): + output_name = [] + output_shape = [] + for node in onnx_session.get_outputs(): + output_name.append(node.name) + output_shape.append(node.shape) + return output_name, output_shape + + +def get_input_info(onnx_session): + input_name = [] + input_shape = [] + for node in onnx_session.get_inputs(): + input_name.append(node.name) + input_shape.append(node.shape) + return input_name, input_shape + + +def get_input_feed(input_name, image_tensor): + """ + Args: + input_name: + image_tensor: [image tensor, ...] + Returns: + """ + input_feed = {} + for index, name in enumerate(input_name): + input_feed[name] = image_tensor[index] + return input_feed + + +class ONNXModel: + def __init__(self, onnx_path, provider='gpu', debug=False, input_dynamic_shape=None): + self.provider = provider + + if self.provider == 'gpu': + self.providers = ( + "CUDAExecutionProvider", + {'device_id': 0, } + ) + elif self.provider == 'trt': + self.providers = ( + 'TensorrtExecutionProvider', + {'trt_engine_cache_enable': True, 'trt_engine_cache_path': './cache/trt', 'trt_fp16_enable': False, } + ) + elif self.provider == 'trt16': + self.providers = ( + 'TensorrtExecutionProvider', + {'trt_engine_cache_enable': True, 'trt_engine_cache_path': './cache/trt', 'trt_fp16_enable': True, + 'trt_dla_enable': False, } + ) + elif self.provider == 'trt8': + self.providers = ( + 'TensorrtExecutionProvider', + {'trt_engine_cache_enable': True, 'trt_int8_enable': 1, } + ) + else: + self.providers = "CPUExecutionProvider" + + # onnxruntime.set_default_logger_severity(3) + session_options = onnxruntime.SessionOptions() + session_options.log_severity_level = 3 + self.onnx_session = onnxruntime.InferenceSession(onnx_path, session_options, providers=[self.providers]) + + # sessionOptions.intra_op_num_threads = 3 + self.input_name, self.input_shape = get_input_info(self.onnx_session) + self.output_name, self.output_shape = get_output_info(self.onnx_session) + + self.input_dynamic_shape = input_dynamic_shape + + if self.input_dynamic_shape is not None: + self.input_dynamic_shape = self.input_dynamic_shape if isinstance(self.input_dynamic_shape, list) else [ + self.input_dynamic_shape] + + if debug: + print('onnx version: {}'.format(onnxruntime.__version__)) + print("input_name:{}, shape:{}".format(self.input_name, self.input_shape)) + print("output_name:{}, shape:{}".format(self.output_name, self.output_shape)) + + self.warm_up() + + def warm_up(self): + if not self.input_dynamic_shape: + try: + self.forward([np.random.rand(*self.input_shape[i]).astype(np.float32) + for i in range(len(self.input_shape))]) + except TypeError: + print('Model may be dynamic, plz name the \'input_dynamic_shape\' !') + else: + self.forward([np.random.rand(*self.input_dynamic_shape[i]).astype(np.float32) + for i in range(len(self.input_shape))]) + print('Model warm up done !') + + def speed_test(self): + if not self.input_dynamic_shape: + input_tensor = [np.random.rand(*self.input_shape[i]).astype(np.float32) + for i in range(len(self.input_shape))] + else: + input_tensor = [np.random.rand(*self.input_dynamic_shape[i]).astype(np.float32) + for i in range(len(self.input_shape))] + + with MyFpsCounter('[{}] onnx 10 times'.format(self.provider)) as mfc: + for i in range(10): + _ = self.forward(input_tensor) + + def forward(self, image_tensor_in, trans=False): + """ + Args: + image_tensor_in: image_tensor [image_tensor] [image_tensor_1, image_tensor_2] + trans: apply trans for image_tensor or first image_tensor(list) + Returns: + model output + """ + if not isinstance(image_tensor_in, list) or len(image_tensor_in) == 1: + image_tensor_in = image_tensor_in[0] if isinstance(image_tensor_in, list) else image_tensor_in + if trans: + image_tensor_in = image_tensor_in.transpose(2, 0, 1)[np.newaxis, :] + image_tensor_in = [np.ascontiguousarray(image_tensor_in)] + else: + # for multi input, only trans first tensor + if trans: + image_tensor_in[0] = image_tensor_in[0].transpose(2, 0, 1)[np.newaxis, :] + image_tensor_in = [np.ascontiguousarray(image_tensor) for image_tensor in image_tensor_in] + + input_feed = get_input_feed(self.input_name, image_tensor_in) + return self.onnx_session.run(self.output_name, input_feed=input_feed) + + def batch_forward(self, bach_image_tensor, trans=False): + if trans: + bach_image_tensor = bach_image_tensor.transpose(0, 3, 1, 2) + input_feed = get_input_feed(self.input_name, bach_image_tensor) + return self.onnx_session.run(self.output_name, input_feed=input_feed) diff --git a/face_feature/model_lib/base_wrapper/onnx_model_picklable.py b/face_feature/model_lib/base_wrapper/onnx_model_picklable.py new file mode 100644 index 0000000000000000000000000000000000000000..0a15a65d87e570b5b1701554fabdfa8bb772da38 --- /dev/null +++ b/face_feature/model_lib/base_wrapper/onnx_model_picklable.py @@ -0,0 +1,116 @@ +# -- coding: utf-8 -- +# @Time : 2021/11/29 +# @Author : ykk648 +# @Project : https://github.com/ykk648/AI_power +# -*-coding: utf-8 -*- + +import onnxruntime +import numpy as np +from cv2box import MyFpsCounter + + +def init_session(onnx_path, provider='gpu'): + if provider == 'gpu': + providers = ( + "CUDAExecutionProvider", + {'device_id': 0, } + ) + elif provider == 'trt': + providers = ( + 'TensorrtExecutionProvider', + {'trt_engine_cache_enable': True, 'trt_fp16_enable': False, } + ) + elif provider == 'trt16': + providers = ( + 'TensorrtExecutionProvider', + {'trt_engine_cache_enable': True, 'trt_fp16_enable': True, } + ) + elif provider == 'trt8': + providers = ( + 'TensorrtExecutionProvider', + {'trt_engine_cache_enable': True, 'trt_int8_enable': True, } + ) + else: + providers = "CPUExecutionProvider" + + # onnxruntime.set_default_logger_severity(3) + session_options = onnxruntime.SessionOptions() + session_options.log_severity_level = 3 + onnx_session = onnxruntime.InferenceSession(onnx_path, session_options, providers=[providers]) + return onnx_session + + +class OnnxModelPickable: # This is a wrapper to make the current InferenceSession class pickable. + def __init__(self, onnx_path, provider='gpu'): + self.onnx_path = onnx_path + self.provider = provider + # self.onnx_session = init_session(self.onnx_path, self.provider) + + def get_input_feed(self, input_name, image_tensor): + """ + input_feed={self.input_name: image_tensor} + :param input_name: + :param image_tensor: + :return: + """ + input_feed = {} + for name in input_name: + input_feed[name] = image_tensor + return input_feed + + def get_output_info(self, onnx_session): + """ + output_name = onnx_session.get_outputs()[0].name + :param onnx_session: + :return: + """ + output_name = [] + output_shape = [] + for node in onnx_session.get_outputs(): + output_name.append(node.name) + output_shape.append(node.shape) + return output_name, output_shape + + def get_input_info(self, onnx_session): + """ + input_name = onnx_session.get_inputs()[0].name + :param onnx_session: + :return: + """ + input_name = [] + input_shape = [] + for node in onnx_session.get_inputs(): + input_name.append(node.name) + input_shape.append(node.shape) + return input_name, input_shape + + def forward(self, image_tensor, trans=False): + ''' + image_tensor = image.transpose(2, 0, 1) + image_tensor = image_tensor[np.newaxis, :] + onnx_session.run([output_name], {input_name: x}) + :param image_tensor: + :return: + ''' + # 输入数据的类型必须与模型一致,以下三种写法都是可以的 + # scores, boxes = self.onnx_session.run(None, {self.input_name: image_tensor}) + # scores, boxes = self.onnx_session.run(self.output_name, input_feed={self.input_name: image_tensor}) + if trans: + image_tensor = image_tensor.transpose(2, 0, 1) + image_tensor = image_tensor[np.newaxis, :] + image_tensor = np.ascontiguousarray(image_tensor) + input_name, _ = self.get_input_info(self.onnx_session) + output_name, _ = self.get_output_info(self.onnx_session) + input_feed = self.get_input_feed(input_name, image_tensor) + return self.onnx_session.run(output_name, input_feed=input_feed) + + def __getstate__(self): + return { + 'onnx_path': self.onnx_path, + 'provider': self.provider, + } + + def __setstate__(self, values): + self.onnx_path = values['onnx_path'] + self.provider = values['provider'] + self.onnx_session = init_session(self.onnx_path, self.provider) diff --git a/face_feature/model_lib/model_base.py b/face_feature/model_lib/model_base.py new file mode 100644 index 0000000000000000000000000000000000000000..b320fb195008897e33b7f1494f78b9d1acf9a116 --- /dev/null +++ b/face_feature/model_lib/model_base.py @@ -0,0 +1,38 @@ +# -- coding: utf-8 -- +# @Time : 2022/7/29 +# @Author : ykk648 +# @Project : https://github.com/ykk648/AI_power + +from .base_wrapper import ONNXModel, OnnxModelPickable +from pathlib import Path + +try: + from .base_wrapper import TRTWrapper +except: + print('trt model needs tensorrt !') + + +class ModelBase: + def __init__(self, model_info, provider): + self.model_path = model_info['model_path'] + + if 'input_dynamic_shape' in model_info.keys(): + self.input_dynamic_shape = model_info['input_dynamic_shape'] + else: + self.input_dynamic_shape = None + + if 'picklable' in model_info.keys(): + picklable = model_info['picklable'] + else: + picklable = False + + # init model + if Path(self.model_path).suffix == '.engine': + self.model_type = 'trt' + self.model = TRTWrapper(self.model_path) + else: + self.model_type = 'onnx' + if not picklable: + self.model = ONNXModel(self.model_path, provider=provider, input_dynamic_shape=self.input_dynamic_shape) + else: + self.model = OnnxModelPickable(self.model_path, provider=provider, ) diff --git a/face_feature/static/style.css b/face_feature/static/style.css new file mode 100644 index 0000000000000000000000000000000000000000..5007443984b8275afcbb7d30bf74700dac070a25 --- /dev/null +++ b/face_feature/static/style.css @@ -0,0 +1,41 @@ +@import url('http://fonts.googleapis.com/css?family=Open+Sans:400,300'); + +* { margin: 0; padding: 0; } + +html +{ + position: relative; + min-height: 100%; +} + +body +{ + font-family: 'Open Sans', Helvetica; + margin-bottom: 60px; + +} + +.footer { + position: absolute; + bottom: 0; + width: 100%; + /* Set the fixed height of the footer here */ + height: 60px; + line-height: 60px; /* Vertically center the text there */ + background-color: #f5f5f5; + font-size: 14px; + } + +.loader { + border: 8px solid #f3f3f3; /* Light grey */ + border-top: 8px solid #3498db; /* Blue */ + border-radius: 50%; + width: 50px; + height: 50px; + animation: spin 1s linear infinite; +} + +@keyframes spin { + 0% { transform: rotate(0deg); } + 100% { transform: rotate(360deg); } +} \ No newline at end of file diff --git a/face_feature/templates/index.html b/face_feature/templates/index.html new file mode 100644 index 0000000000000000000000000000000000000000..c18d7b5456ed222d08b946210cb05bac3526c490 --- /dev/null +++ b/face_feature/templates/index.html @@ -0,0 +1,97 @@ + + + + + + AI html demo + + + + + + +
+
+
+

face align and latent extract

+
+
+
+ +
+
+
+ + + must be image format, + < 10M
+
+
+
+ +
+
+
+
+
+
+
+
+

API Demo

+

curl

+ curl -X POST -F file=@test.jpg http://192.168.6.190:5005/predict +

get

+ 'latent' 'crop_face' 'status'
+
+
+ + + + + + + + \ No newline at end of file diff --git a/face_feature/utils/__init__.py b/face_feature/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..236edbfffa49ff32a66bd048e96b00a304dddb9e --- /dev/null +++ b/face_feature/utils/__init__.py @@ -0,0 +1,4 @@ +# -- coding: utf-8 -- +# @Time : 2021/11/19 +# @Author : ykk648 +# @Project : https://github.com/ykk648/AI_power diff --git a/face_feature/utils/data_encrypt.py b/face_feature/utils/data_encrypt.py new file mode 100644 index 0000000000000000000000000000000000000000..b3ebceb65ba5100d804a450deeab9643734006e5 --- /dev/null +++ b/face_feature/utils/data_encrypt.py @@ -0,0 +1,83 @@ +# -- coding: utf-8 -- +# @Time : 2021/11/26 +# @Author : ykk648 +# @Project : https://github.com/ykk648/AI_power + +# pip install pycryptodome +from Crypto.Cipher import AES +import operator +import time + +AES_BLOCK_SIZE = AES.block_size # AES 加密数据块大小, 只能是16 +AES_KEY_SIZE = 16 # AES 密钥长度(单位字节),可选 16、24、32,对应 128、192、256 位密钥 + + +# 待加密文本补齐到 block size 的整数倍 +def PadTest(bytes): + while len(bytes) % AES_BLOCK_SIZE != 0: # 循环直到补齐 AES_BLOCK_SIZE 的倍数 + bytes += ' '.encode() # 通过补空格(不影响源文件的可读)来补齐 + return bytes # 返回补齐后的字节列表 + + +# 待加密的密钥补齐到对应的位数 +def PadKey(key): + if len(key) > AES_KEY_SIZE: # 如果密钥长度超过 AES_KEY_SIZE + return key[:AES_KEY_SIZE] # 截取前面部分作为密钥并返回 + while len(key) % AES_KEY_SIZE != 0: # 不到 AES_KEY_SIZE 长度则补齐 + key += ' '.encode() # 补齐的字符可用任意字符代替 + return key # 返回补齐后的密钥 + + +# AES 加密 +def EnCrypt(key, bytes): + myCipher = AES.new(key, AES.MODE_ECB) # 新建一个 AES 算法实例,使用 ECB(电子密码本)模式 + encryptData = myCipher.encrypt(bytes) # 调用加密方法,得到加密后的数据 + return encryptData # 返回加密数据 + + +# AES 解密 +def DeCrypt(key, encryptData): + myCipher = AES.new(key, AES.MODE_ECB) # 新建一个 AES 算法实例,使用 ECB(电子密码本)模式 + bytes = myCipher.decrypt(encryptData) # 调用解密方法,得到解密后的数据 + return bytes # 返回解密数据 + + +def load_encrypt_model(model_path, key='test'): + with open(model_path, 'rb') as f_: + bytes_aes = f_.read() + bytes = DeCrypt(PadKey(key.encode()), bytes_aes) + pad_len = int(model_path.split('_')[-1].split('.')[0]) + return bytes[:-pad_len] + + +if __name__ == '__main__': + + key = "test" + file_path = '' + output_path = '' + + with open(file_path, 'rb') as f: # 以二进制模式打开文件 + bytes_ori = f.read() # 将文件内容读取出来到字节列表中 + print('源文件长度:{}'.format(len(bytes_ori))) + + key = PadKey(key.encode()) # 将密钥转换位字节列表并补齐密钥 + bytes = PadTest(bytes_ori) # 补齐原始数据 + print('补齐后的源文件长度:{}'.format(len(bytes))) + + encryptTest = EnCrypt(key, bytes) # 利用密钥对原始数据进行加密 + + start_time = time.time() + decryptTest = DeCrypt(key, encryptTest) # 利用密钥对加密的数据进行解密 + dt = time.time() - start_time + print("解密时间: ", dt) + + pad_len = len(bytes) - len(bytes_ori) + print(pad_len) + decryptTest = decryptTest[:-pad_len] + print('补齐长度:{} '.format(len(decryptTest))) + if operator.eq(bytes_ori, decryptTest): # 检查加解密是否成功 + print('AES 加解密成功!') + with open(output_path, "wb") as fo: + fo.write(encryptTest) + else: + print('AES 加解密失败,解密数据与元数据不相等') diff --git a/face_feature/utils/output_excel.py b/face_feature/utils/output_excel.py new file mode 100644 index 0000000000000000000000000000000000000000..58b370a6321bc3fd0fc5775e446e1e0208b381a5 --- /dev/null +++ b/face_feature/utils/output_excel.py @@ -0,0 +1,41 @@ +# -- coding: utf-8 -- +# @Time : 2021/12/20 +# @Author : ykk648 +# @Project : https://github.com/ykk648/AI_power + +import os +import xlrd +from openpyxl import load_workbook +from openpyxl.drawing.image import Image +from cv2box import CVFile, CVImage + +image_dir = './data/face_145/' +xlsx_path = './test.xlsx' + +# 打开基础信息sheet +wb = load_workbook(xlsx_path) +sheet = wb["基本信息"] + + +def insert_image(insert_location, image_path): + img = Image(image_path) + new_size = (256, 256) + img.width, img.height = new_size + + sheet[insert_location] = "" + sheet.add_image(img, insert_location) + + wb.save(xlsx_path) + print("插入成功!") + + +pkl_data = CVFile('./data/image_feature_dict.pkl').data +# print(pkl_data) + +for k, v in pkl_data.items(): + print("B" + k) + img = CVImage(v[1]).save('./temp/test{}.jpg'.format(k)) + insert_image("B" + k, './temp/test{}.jpg'.format(k)) + +wb.save(xlsx_path) +wb.close() diff --git a/face_lib/face_swap/__init__.py b/face_lib/face_swap/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6cb5f62674835fd7e785075e2549c582173dd4d7 --- /dev/null +++ b/face_lib/face_swap/__init__.py @@ -0,0 +1,5 @@ +# -- coding: utf-8 -- +# @Time : 2022/8/25 +# @Author : ykk648 +# @Project : https://github.com/ykk648/AI_power +from .hififace_api import HifiFace diff --git a/face_lib/face_swap/hififace_api.py b/face_lib/face_swap/hififace_api.py new file mode 100644 index 0000000000000000000000000000000000000000..e97d027788607fc1243c0219da9ab9997bb27ce6 --- /dev/null +++ b/face_lib/face_swap/hififace_api.py @@ -0,0 +1,35 @@ +# -- coding: utf-8 -- +# @Time : 2022/8/25 +# @Author : ykk648 +# @Project : https://github.com/ykk648/AI_power +import numpy as np +from model_lib import ModelBase + +MODEL_ZOO = { + 'er8_bs1': { + 'model_path': 'pretrain_models/9O_865k.onnx', + }, +} + + +class HifiFace(ModelBase): + def __init__(self, model_name='er8_bs1', provider='gpu'): + super().__init__(MODEL_ZOO[model_name], provider) + + def forward(self, src_face_image, dst_face_latent): + """ + Args: + src_face_image: + dst_face_latent: + Returns: + """ + img_tensor = ((src_face_image.transpose(2, 0, 1) / 255.0) * 2 - 1)[None] + blob = [img_tensor.astype(np.float32), dst_face_latent.astype(np.float32)] + output = self.model.forward(blob) + # print("-------------model_type:",self.model_type) + if self.model_type == 'trt': + mask, swap_face = output + else: + swap_face, mask = output + + return mask, swap_face diff --git a/face_lib/face_swap/utils.py b/face_lib/face_swap/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..9ca40c5b036e21ad59c3eb25b912b26c15fc6994 --- /dev/null +++ b/face_lib/face_swap/utils.py @@ -0,0 +1,12 @@ +# -- coding: utf-8 -- +# @Time : 2022/8/26 +# @Author : ykk648 +# @Project : https://github.com/ykk648/AI_power + +# def reverse2wholeimage_hifi(swaped_img, mat_rev, img_mask, frame_wait_merge, orisize): +# swaped_img = swaped_img.cpu().numpy().transpose((1, 2, 0)) +# target_image = cv2.warpAffine(swaped_img, mat_rev, orisize) +# img = ne.evaluate('img_mask * (target_image * 255) ')[..., ::-1] +# img = ne.evaluate('img + frame_wait_merge') +# final_img = img.astype(np.uint8) +# return final_img \ No newline at end of file diff --git a/face_restore/__init__.py b/face_restore/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d8a17245b714811662a43b604bd6be1aa6e9a81f --- /dev/null +++ b/face_restore/__init__.py @@ -0,0 +1,2 @@ + +from .gfpgan_onnx_api import GFPGAN diff --git a/face_restore/gfpgan_onnx_api.py b/face_restore/gfpgan_onnx_api.py new file mode 100644 index 0000000000000000000000000000000000000000..c764a4d29b35e22557fc001bef5905daa39ba327 --- /dev/null +++ b/face_restore/gfpgan_onnx_api.py @@ -0,0 +1,67 @@ +# -- coding: utf-8 -- +# @Time : 2022/11/8 + + +from cv2box import CVImage, MyFpsCounter +from model_lib import ModelBase +import numpy as np +import cv2 + +MODEL_ZOO = { + # https://github.com/xuanandsix/GFPGAN-onnxruntime-demo + # input_name:['input'], shape:[[1, 3, 512, 512]] + # output_name:['1392'], shape:[[1, 3, 512, 512]] + 'GFPGANv1.4': { + 'model_path': './pretrain_models/gfpganv14_fp32_bs1_scale.onnx' + }, + 'codeformer': { + 'model_path':'./pretrain_models/codeformer_fp32_bs1_scale_adain.onnx' + }, + +} + + +class GFPGAN(ModelBase): + def __init__(self, model_type='GFPGANv1.4', provider='gpu'): + super().__init__(MODEL_ZOO[model_type], provider) + self.model_type = model_type + self.input_std = self.input_mean = 127.5 + self.input_size = (512, 512) + self.model_type = model_type + + def forward(self, face_image): + """ + Args: + face_image: cv2 image -1~1 RGB + Returns: + RGB 256x256x3 -1~1 + """ + face_image = (face_image + 1) / 2 + face_image_h, face_image_w, _ = face_image.shape + if face_image_h != 512: + face_image = cv2.resize(face_image, (512, 512)) + + face_image = np.uint8(face_image * 255.0) + # image_in = CVImage(face_image).blob(self.input_size, self.input_mean, self.input_std, rgb=False) + image_in = CVImage(face_image).set_blob(self.input_std, self.input_mean, self.input_size).blob_in(rgb=False) + if 'codeformer' in self.model_type: + image_out = self.model.forward([image_in,np.array(1,dtype=np.float32)]) + else: + image_out = self.model.forward(image_in) + + # print(image_out[0][0].shape) + output_face = ((image_out[0][0] + 1) / 2).transpose(1, 2, 0).clip(0, 1) + if face_image_h != 512: + output_face = cv2.resize(output_face, (face_image_w, face_image_h)) + output_face = (output_face * 2 - 1.0) + return output_face + + +if __name__ == '__main__': + face_img_p = 'data/source/ym-1.jpeg' + fa = GFPGAN(model_type='GFPGANv1.4', provider='gpu') + with MyFpsCounter() as mfc: + for i in range(10): + face = fa.forward(face_img_p) + # CVImage(face, image_format='cv2').save('./gfpgan.jpg') + CVImage(face, image_format='cv2').show() diff --git a/face_restore/xseg_onnx_api.py b/face_restore/xseg_onnx_api.py new file mode 100644 index 0000000000000000000000000000000000000000..91196c03c7c33c7c12fc6d2cd6d5a7c8708da2c2 --- /dev/null +++ b/face_restore/xseg_onnx_api.py @@ -0,0 +1,57 @@ +# -- coding: utf-8 -- +# @Time : 2022/11/8 + + +from cv2box import CVImage, MyFpsCounter + +from model_lib import ModelBase +import numpy as np +import cv2 + +MODEL_ZOO = { + 'xseg_0611': { + 'model_path': './pretrain_models/xseg_230611_16_17.onnx', + 'input_dynamic_shape': [[1, 256, 256, 3]] + }, +} + + +class XSEG(ModelBase): + def __init__(self, model_type='xseg_0611', provider='cpu'): + super().__init__(MODEL_ZOO[model_type], provider) + self.model_type = model_type + + + def forward(self, face_image): + """ + Args: + face_image: cv2 image -1~1 RGB + Returns: + RGB 256x256x3 -1~1 + """ + face_image = (face_image + 1) / 2 + if face_image.shape[-1] >= 4: + if len(face_image.shape)>3: + face_image = face_image[0] + face_image = face_image.transpose(1, 2, 0) + face_image_h, face_image_w, _ = face_image.shape + if face_image_h != 256: + face_image = cv2.resize(face_image, (256, 256)) + image_out = self.model.forward(face_image[...,::-1][None].astype(np.float32)) + # print(image_out[0][0].shape) + output_face = (image_out[0].squeeze()).clip(0, 1) + if face_image_h != 256: + output_face = cv2.resize(output_face, (face_image_w, face_image_h)) + return output_face + + +if __name__ == '__main__': + face_img_p = 'data/source/ym-1.jpeg' + fa = XSEG(model_type='xseg_0611', provider='trt16') + face_img = (cv2.resize(cv2.imread(face_img_p)/127.5-1,(512,512)))[...,::-1] + + with MyFpsCounter() as mfc: + for i in range(20): + face = fa.forward(face_img) + # CVImage(face, image_format='cv2').save('./xseg.jpg') + #CVImage(face, image_format='cv2').show() diff --git a/inference.py b/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..4db3a1f11eb65d98b444456275db5532e5c1a17e --- /dev/null +++ b/inference.py @@ -0,0 +1,213 @@ +import os.path +import pickle +from multiprocessing.dummy import Process, Manager, Queue +import cv2 +import time +from options.hifi_test_options import HifiTestOptions +from face_feature.hifi_image_api import HifiImage + +# close onnxruntime warning +import onnxruntime +onnxruntime.set_default_logger_severity(3) + + +class GenInput(Process): + def __init__(self, feature_src_list_, frame_queue_in_, frame_queue_out_, video_cap, src_img_path): + super().__init__() + self.frame_queue_in = frame_queue_in_ + self.frame_queue_out = frame_queue_out_ + self.feature_src_list = feature_src_list_ + self.src_img_path = src_img_path + self.video_cap = video_cap + self.hi = HifiImage(crop_size=256) + + def run(self): + src_latent, crop_face = self.hi.get_face_feature(self.src_img_path) + human_feature = [src_latent, crop_face] + self.feature_src_list.append([human_feature]) + + count = index = 0 + while True: + # import numpy as np + # frame = np.zeros((1080, 1920, 3), dtype=np.uint8) + have_frame, frame = self.video_cap.read() + if not have_frame: + self.frame_queue_in.put(None) + print("no more frame") + # video.release() + break + # print(frame.shape) + self.frame_queue_in.put(frame) + + + +def save_video_ffmpeg(video_path, swap_video_path, model_name=''): + video_name = os.path.basename(video_path).split('.')[-2] + # audio_file_path = os.path.join(video_dir, video_name + '.wav') + audio_file_path = video_path.split('.')[-2] + '.wav' + if not os.path.exists(audio_file_path): + print('extract audio') + os.system( + 'ffmpeg -y -hide_banner -loglevel error -i "' + + str(video_path) + + '" -f wav -vn "' + + str(audio_file_path) + + '"' + ) + else: + print('audio file exist') + if os.path.exists(audio_file_path): + os.rename(swap_video_path, swap_video_path.replace('.mp4', '_no_audio.mp4')) + print('add audio') + # start = time.time() + os.system( + 'ffmpeg -y -hide_banner -loglevel error -i "' + + str(swap_video_path.replace('.mp4', '_no_audio.mp4')) + + '" -i "' + + str(audio_file_path) + # + '" -c:v copy "' + + '" -c:v libx264 "' + + '"-c:a aac -b:v 40000k "' + + str(swap_video_path) + + '"' + ) + # print('add audio time cost', time.time() - start) + # print('remove temp') + os.remove(swap_video_path.replace('.mp4', '_no_audio.mp4')) + if model_name != '': + os.rename(swap_video_path, swap_video_path.replace('.mp4', '_%s.mp4' % model_name)) + os.remove(audio_file_path) + +def chang_video_resolution(video_path, resize_video_path): + print('change video resolution to 1080p') + os.system( + 'ffmpeg -y -hide_banner -loglevel error -i "' + + str(video_path) + + '" -vf scale=1080:-1 -c:v libx264 -c:a aac -b:v 20000k "' + + str(resize_video_path) + + '"' + ) + + +class GetOutput(Process): + def __init__(self, frame_queue_out_, src_video_path, model_name, out_dir, video_fps, video_size, video_frame_count, image_name, + align_method, use_gfpgan, sr_weight, use_color_trans=False, color_trans_mode='rct'): + # def __init__(self, frame_queue_out_, src_video_path, model_name, out_dir, video_info): + super().__init__() + self.frame_queue_out = frame_queue_out_ + self.src_video_path = src_video_path + out_video_name = image_name + '_to_' + os.path.basename(src_video_path).split('.')[-2] + '_' + model_name + '_' + align_method + '.mp4' + if use_gfpgan: + out_video_name = out_video_name.replace('.mp4', '_sr_{}.mp4'.format(sr_weight)) + if use_color_trans: + out_video_name = out_video_name.replace('.mp4', '_'+color_trans_mode+'.mp4') + self.out_path = os.path.join(out_dir, out_video_name) + # self.video_info = video_info + print(self.out_path) + self.videoWriter = cv2.VideoWriter(self.out_path, cv2.VideoWriter_fourcc(*'mp4v'), video_fps, video_size) + self.video_frame_count = video_frame_count + # self.model_name = model_name + + + + def run(self): + # import time + count = 0 + fps_count = 0 + + start_time = time.time() + while True: + queue_out = self.frame_queue_out.get() + frame_out = queue_out + # print("out:", type(queue_out)) + fps_count += 1 + + if fps_count % 100 == 0: + end_time = time.time() + print('fps: {}'.format(fps_count / (end_time - start_time))) + start_time = time.time() + fps_count = 0 + count += 1 + if count % self.video_frame_count == 0: + break + self.videoWriter.write(frame_out) + self.videoWriter.release() + start_time = time.time() + save_video_ffmpeg(self.src_video_path, self.out_path) + print("add audio cost:", time.time() - start_time) + + + +class FaceSwap(Process): + def __init__(self, feature_src_list_, frame_queue_in_, + frame_queue_out_, model_name='', align_method='68', use_gfpgan=True, sr_weight=1.0,color_trans_mode='rct'): + super().__init__() + from HifiFaceAPI_parallel_trt_roi_realtime_sr_api import HifiFaceRealTime + self.hfrt = HifiFaceRealTime(feature_src_list_, frame_queue_in_, + frame_queue_out_, model_name=model_name, align_method=align_method, + use_gfpgan=use_gfpgan, sr_weight=sr_weight, use_color_trans=False, color_trans_mode=color_trans_mode) + def run(self): + self.hfrt.forward() + + +if __name__ == '__main__': + frame_queue_in = Queue(2) + frame_queue_out = Queue(2) + manager = Manager() + image_feature_src_list = manager.list() + opt = HifiTestOptions().parse() + + model_name = opt.model_name + align_method = opt.align_method + use_gfpgan = opt.use_gfpgan + sr_weight = opt.sr_weight + use_color_trans = opt.use_color_trans + color_trans_mode = opt.color_trans_mode + print("use_gfpgan:", use_gfpgan, "use use_color_trans:", use_color_trans) + + src_img_path = opt.src_img_path + image_name = src_img_path.split('/')[-1].split('.')[0] + video_path = opt.video_path + print(video_path) + video_name = video_path.split('/')[-1].split('.')[0] + output_dir = opt.output_dir + output_dir = os.path.join(output_dir, video_name) + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + video = cv2.VideoCapture(video_path) + video_fps = video.get(cv2.CAP_PROP_FPS) + video_size = (int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), + int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))) + video_frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) + + print("ori_video_size:", video_size) + if video_size != (1080, 1920) and opt.video_to_1080p: + resize_video_path = video_path.replace('.mp4', '_1080p.mp4') + if not os.path.exists(resize_video_path): + chang_video_resolution(video_path, resize_video_path) + video_path = resize_video_path + # video_size = (1080, 1920) + + t1 = time.time() + gi = GenInput(image_feature_src_list, frame_queue_in, frame_queue_out, video, src_img_path) + + go = GetOutput(frame_queue_out, video_path, model_name, output_dir, video_fps, video_size, video_frame_count, image_name, + align_method, use_gfpgan, sr_weight, use_color_trans, color_trans_mode) + + fs = FaceSwap(image_feature_src_list, frame_queue_in, frame_queue_out, + model_name=model_name, align_method=align_method, use_gfpgan=use_gfpgan, sr_weight=sr_weight, color_trans_mode=color_trans_mode) + + gi.start() + go.start() + fs.start() + + gi.join() + print('gi stop') + go.join() + print('go stop') + fs.join() + print('fs stop') + + video.release() + + print("time cost:", time.time()-t1) diff --git a/inference_cam.py b/inference_cam.py new file mode 100644 index 0000000000000000000000000000000000000000..787c338ed5c8aa885ba40e69910c041899fcbbba --- /dev/null +++ b/inference_cam.py @@ -0,0 +1,114 @@ +import pickle + +from multiprocessing.dummy import Process, Manager, Queue +import cv2 +import numpy as np +# close onnxruntime warning +import onnxruntime +onnxruntime.set_default_logger_severity(3) + + +class GenInput(Process): + def __init__(self, feature_src_list_, frame_queue_in_, frame_queue_out_): + super().__init__() + self.frame_queue_in = frame_queue_in_ + self.frame_queue_out = frame_queue_out_ + self.feature_src_list = feature_src_list_ + + def run(self): + with open('data/image_feature_dict.pkl', 'rb') as f: + image_feature_src_dict = pickle.load(f) + + print(len(image_feature_src_dict)) + self.feature_src_list.append([image_feature_src_dict['1']]) + + cap = cv2.VideoCapture(0) # 640 480 1280 720 1920 1080 + cap.set(3, 1920) + cap.set(4, 1080) + print(cv2.CAP_PROP_FOURCC, cv2.CAP_PROP_FPS) + cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')) + cap.set(cv2.CAP_PROP_FPS, 30) + print('CAP_PROP_FPS',cap.get(cv2.CAP_PROP_FPS)) + + count = index = 0 + while True: + _, frame = cap.read() + self.frame_queue_in.put(frame) + + count += 1 + if count % 500 == 0: + self.feature_src_list.append([image_feature_src_dict['{}'.format(1 + index)], + image_feature_src_dict['{}'.format(10 + index)]]) + print('change src face') + index += 1 + if count % 5000 == 0: + # 退出条件 + self.frame_queue_in.put(None) + break + + +class GetOutput(Process): + def __init__(self, frame_queue_out_): + super().__init__() + self.frame_queue_out = frame_queue_out_ + + def run(self): + import time + count = 0 + fps_count = 0 + + start_time = time.time() + while True: + queue_out = self.frame_queue_out.get() + # print(queue_out) + frame_out = queue_out[0] + face_detect_flag = queue_out[1] + # print(face_detect_flag) + fps_count += 1 + + if fps_count % 300 == 0: + end_time = time.time() + print('fps: {}'.format(fps_count / (end_time - start_time))) + start_time = time.time() + fps_count = 0 + count += 1 + if count % 2500 == 0: + break + cv2.imshow('output', frame_out) + cv2.waitKey(1) + + +class FaceSwap(Process): + def __init__(self, feature_src_list_, frame_queue_in_, + frame_queue_out_, model_name=''): + super().__init__() + from HifiFaceAPI_parallel_trt_roi_realtime_api import HifiFaceRealTime + self.hfrt = HifiFaceRealTime(feature_src_list_, frame_queue_in_, + frame_queue_out_, model_name=model_name) + + def run(self): + self.hfrt.forward() + + +if __name__ == '__main__': + frame_queue_in = Queue(2) + frame_queue_out = Queue(2) + manager = Manager() + image_feature_src_list = manager.list() + + gi = GenInput(image_feature_src_list, frame_queue_in, frame_queue_out) + go = GetOutput(frame_queue_out) + fs = FaceSwap(image_feature_src_list, frame_queue_in, frame_queue_out, model_name='er8_bs1') + + gi.start() + go.start() + fs.start() + + gi.join() + print('gi stop') + go.join() + print('go stop') + fs.join() + print('fs stop') + + print('final stop') diff --git a/model_lib/__init__.py b/model_lib/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c8223f4ba295fe8974e978f67af58316326b29a9 --- /dev/null +++ b/model_lib/__init__.py @@ -0,0 +1,3 @@ +from .base_wrapper import ONNXModel +from .model_base import ModelBase + diff --git a/model_lib/base_wrapper/__init__.py b/model_lib/base_wrapper/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..740dcd237cdc3dca8ce5d188c48c1d61b3b21832 --- /dev/null +++ b/model_lib/base_wrapper/__init__.py @@ -0,0 +1,6 @@ +# -- coding: utf-8 -- +# @Time : 2022/8/26 +# @Author : ykk648 +# @Project : https://github.com/ykk648/AI_power +from .onnx_model import ONNXModel +from .onnx_model_picklable import OnnxModelPickable diff --git a/model_lib/base_wrapper/onnx_model.py b/model_lib/base_wrapper/onnx_model.py new file mode 100644 index 0000000000000000000000000000000000000000..a7ea743ba10711b3b6a403501966d48888c049b2 --- /dev/null +++ b/model_lib/base_wrapper/onnx_model.py @@ -0,0 +1,166 @@ +# -- coding: utf-8 -- +# @Time : 2021/11/29 +# @Author : ykk648 +# @Project : https://github.com/ykk648/AI_power + +""" +todo: io_binding https://onnxruntime.ai/docs/api/python/api_summary.html +""" +import os + +import onnxruntime +import numpy as np +from cv2box import MyFpsCounter +import re +def get_output_info(onnx_session): + output_name = [] + output_shape = [] + for node in onnx_session.get_outputs(): + output_name.append(node.name) + output_shape.append(node.shape) + return output_name, output_shape + + +def get_input_info(onnx_session): + input_name = [] + input_shape = [] + for node in onnx_session.get_inputs(): + input_name.append(node.name) + input_shape.append(node.shape) + return input_name, input_shape + + +def get_input_feed(input_name, image_tensor): + """ + Args: + input_name: + image_tensor: [image tensor, ...] + Returns: + """ + input_feed = {} + for index, name in enumerate(input_name): + input_feed[name] = image_tensor[index] + return input_feed + + +class ONNXModel: + def __init__(self, onnx_path, provider='gpu', debug=False, input_dynamic_shape=None, model_name=''): + self.provider = provider + trt_cache_path = './cache/' + str(self.provider) + '/' + str(model_name) + if self.provider == 'gpu': + self.providers = ( + "CUDAExecutionProvider", + {'device_id': 0, } + ) + elif self.provider == 'trt': + os.makedirs(trt_cache_path, exist_ok=True) + self.providers = ( + 'TensorrtExecutionProvider', + {'trt_engine_cache_enable': True, 'trt_engine_cache_path': trt_cache_path, 'trt_fp16_enable': False, } + ) + elif self.provider == 'trt16': + os.makedirs(trt_cache_path, exist_ok=True) + self.providers = ( + 'TensorrtExecutionProvider', + {'trt_engine_cache_enable': True, 'trt_engine_cache_path': trt_cache_path, 'trt_fp16_enable': True, + 'trt_dla_enable': False} + ) + elif self.provider == 'trt8': + os.makedirs(trt_cache_path, exist_ok=True) + self.providers = ( + 'TensorrtExecutionProvider', + {'trt_engine_cache_enable': True, 'trt_int8_enable': 1, } + ) + else: + self.providers = "CPUExecutionProvider" + #onnxruntime.set_default_logger_severity(2) + session_options = onnxruntime.SessionOptions() + session_options.log_severity_level = 3 + try: + self.onnx_session = onnxruntime.InferenceSession(onnx_path, session_options, providers=[self.providers]) + except Exception as e: + if type(e.args[0])==str and 'TensorRT EP could not deserialize engine from cache' in e.args[0]: + res = re.match('.*TensorRT EP could not deserialize engine from cache: (.*)', e.args[0]) + os.remove(res.group(1)) + print('waiting generate new model...') + self.onnx_session = onnxruntime.InferenceSession(onnx_path, session_options, providers=[self.providers]) + else: + raise e + print(model_name,self.onnx_session.get_providers()) + if 'trt' in self.provider: + assert 'Tensorrt' in self.onnx_session.get_providers()[0], 'Tensorrt start failure' + # sessionOptions.intra_op_num_threads = 3 + self.input_name, self.input_shape = get_input_info(self.onnx_session) + self.output_name, self.output_shape = get_output_info(self.onnx_session) + + self.input_dynamic_shape = input_dynamic_shape + + if self.input_dynamic_shape is not None: + self.input_dynamic_shape = self.input_dynamic_shape if isinstance(self.input_dynamic_shape, list) else [ + self.input_dynamic_shape] + + if debug: + print('onnx version: {}'.format(onnxruntime.__version__)) + print("input_name:{}, shape:{}".format(self.input_name, self.input_shape)) + print("output_name:{}, shape:{}".format(self.output_name, self.output_shape)) + + self.warm_up() + self.speed_test() + self.speed_test() + def warm_up(self): + if not self.input_dynamic_shape: + try: + self.forward([np.random.rand(*([1]+self.input_shape[i][1:])).astype(np.float32) + for i in range(len(self.input_shape))]) + except TypeError: + print('Model may be dynamic, plz name the \'input_dynamic_shape\' !') + else: + self.forward([np.random.rand(*self.input_dynamic_shape[i]).astype(np.float32) + for i in range(len(self.input_shape))]) + print('Model warm up done !') + + def speed_test(self): + if not self.input_dynamic_shape: + input_tensor = [np.random.rand(*([1]+self.input_shape[i][1:])).astype(np.float32) + for i in range(len(self.input_shape))] + else: + input_tensor = [np.random.rand(*self.input_dynamic_shape[i]).astype(np.float32) + for i in range(len(self.input_shape))] + + with MyFpsCounter('[{}] onnx 10 times'.format(self.provider)) as mfc: + for i in range(10): + _ = self.forward(input_tensor) + + def forward(self, image_tensor_in, trans=False): + """ + Args: + image_tensor_in: image_tensor [image_tensor] [image_tensor_1, image_tensor_2] + trans: apply trans for image_tensor or first image_tensor(list) + Returns: + model output + """ + if not isinstance(image_tensor_in, list) or len(image_tensor_in) == 1: + image_tensor_in = image_tensor_in[0] if isinstance(image_tensor_in, list) else image_tensor_in + if trans: + image_tensor_in = image_tensor_in.transpose(2, 0, 1)[np.newaxis, :] + image_tensor_in = [np.ascontiguousarray(image_tensor_in)] + else: + # for multi input, only trans first tensor + if trans: + image_tensor_in[0] = image_tensor_in[0].transpose(2, 0, 1)[np.newaxis, :] + image_tensor_in = [np.ascontiguousarray(image_tensor) for image_tensor in image_tensor_in] + + input_feed = get_input_feed(self.input_name, image_tensor_in) + temp_result = self.onnx_session.run(self.output_name, input_feed=input_feed) + if len(temp_result)==1: + return temp_result + else: + while np.any(np.isnan(temp_result[0])) or np.any(np.isnan(temp_result[1])): + temp_result = self.onnx_session.run(self.output_name, input_feed=input_feed) + return temp_result + + def batch_forward(self, bach_image_tensor, trans=False): + if trans: + bach_image_tensor = bach_image_tensor.transpose(0, 3, 1, 2) + input_feed = get_input_feed(self.input_name, bach_image_tensor) + return self.onnx_session.run(self.output_name, input_feed=input_feed) diff --git a/model_lib/base_wrapper/onnx_model_picklable.py b/model_lib/base_wrapper/onnx_model_picklable.py new file mode 100644 index 0000000000000000000000000000000000000000..0a15a65d87e570b5b1701554fabdfa8bb772da38 --- /dev/null +++ b/model_lib/base_wrapper/onnx_model_picklable.py @@ -0,0 +1,116 @@ +# -- coding: utf-8 -- +# @Time : 2021/11/29 +# @Author : ykk648 +# @Project : https://github.com/ykk648/AI_power +# -*-coding: utf-8 -*- + +import onnxruntime +import numpy as np +from cv2box import MyFpsCounter + + +def init_session(onnx_path, provider='gpu'): + if provider == 'gpu': + providers = ( + "CUDAExecutionProvider", + {'device_id': 0, } + ) + elif provider == 'trt': + providers = ( + 'TensorrtExecutionProvider', + {'trt_engine_cache_enable': True, 'trt_fp16_enable': False, } + ) + elif provider == 'trt16': + providers = ( + 'TensorrtExecutionProvider', + {'trt_engine_cache_enable': True, 'trt_fp16_enable': True, } + ) + elif provider == 'trt8': + providers = ( + 'TensorrtExecutionProvider', + {'trt_engine_cache_enable': True, 'trt_int8_enable': True, } + ) + else: + providers = "CPUExecutionProvider" + + # onnxruntime.set_default_logger_severity(3) + session_options = onnxruntime.SessionOptions() + session_options.log_severity_level = 3 + onnx_session = onnxruntime.InferenceSession(onnx_path, session_options, providers=[providers]) + return onnx_session + + +class OnnxModelPickable: # This is a wrapper to make the current InferenceSession class pickable. + def __init__(self, onnx_path, provider='gpu'): + self.onnx_path = onnx_path + self.provider = provider + # self.onnx_session = init_session(self.onnx_path, self.provider) + + def get_input_feed(self, input_name, image_tensor): + """ + input_feed={self.input_name: image_tensor} + :param input_name: + :param image_tensor: + :return: + """ + input_feed = {} + for name in input_name: + input_feed[name] = image_tensor + return input_feed + + def get_output_info(self, onnx_session): + """ + output_name = onnx_session.get_outputs()[0].name + :param onnx_session: + :return: + """ + output_name = [] + output_shape = [] + for node in onnx_session.get_outputs(): + output_name.append(node.name) + output_shape.append(node.shape) + return output_name, output_shape + + def get_input_info(self, onnx_session): + """ + input_name = onnx_session.get_inputs()[0].name + :param onnx_session: + :return: + """ + input_name = [] + input_shape = [] + for node in onnx_session.get_inputs(): + input_name.append(node.name) + input_shape.append(node.shape) + return input_name, input_shape + + def forward(self, image_tensor, trans=False): + ''' + image_tensor = image.transpose(2, 0, 1) + image_tensor = image_tensor[np.newaxis, :] + onnx_session.run([output_name], {input_name: x}) + :param image_tensor: + :return: + ''' + # 输入数据的类型必须与模型一致,以下三种写法都是可以的 + # scores, boxes = self.onnx_session.run(None, {self.input_name: image_tensor}) + # scores, boxes = self.onnx_session.run(self.output_name, input_feed={self.input_name: image_tensor}) + if trans: + image_tensor = image_tensor.transpose(2, 0, 1) + image_tensor = image_tensor[np.newaxis, :] + image_tensor = np.ascontiguousarray(image_tensor) + input_name, _ = self.get_input_info(self.onnx_session) + output_name, _ = self.get_output_info(self.onnx_session) + input_feed = self.get_input_feed(input_name, image_tensor) + return self.onnx_session.run(output_name, input_feed=input_feed) + + def __getstate__(self): + return { + 'onnx_path': self.onnx_path, + 'provider': self.provider, + } + + def __setstate__(self, values): + self.onnx_path = values['onnx_path'] + self.provider = values['provider'] + self.onnx_session = init_session(self.onnx_path, self.provider) diff --git a/model_lib/model_base.py b/model_lib/model_base.py new file mode 100644 index 0000000000000000000000000000000000000000..346a6a6f2cb00cf5ce9c1888783014ef2b3f1be2 --- /dev/null +++ b/model_lib/model_base.py @@ -0,0 +1,43 @@ +# -- coding: utf-8 -- +# @Time : 2022/7/29 +# @Author : ykk648 +# @Project : https://github.com/ykk648/AI_power + +from .base_wrapper import ONNXModel, OnnxModelPickable +from pathlib import Path +import torch + +class ModelBase: + def __init__(self, model_info, provider): + self.model_path = model_info['model_path'] + + if 'input_dynamic_shape' in model_info.keys(): + self.input_dynamic_shape = model_info['input_dynamic_shape'] + else: + self.input_dynamic_shape = None + + if 'picklable' in model_info.keys(): + picklable = model_info['picklable'] + else: + picklable = False + + if 'trt_wrapper_self' in model_info.keys(): + TRTWrapper = TRTWrapperSelf + + # init model + if Path(self.model_path).suffix == '.engine': + self.model_type = 'trt' + self.model = TRTWrapper(self.model_path) + elif Path(self.model_path).suffix == '.tjm': + self.model_type = 'tjm' + self.model =torch.jit.load(self.model_path) + self.model.eval() + elif Path(self.model_path).suffix in ['.onnx', '.bin']: + self.model_type = 'onnx' + model_name = self.model_path.split('/')[-1].split('.')[0].split('_')[0] + if not picklable: + self.model = ONNXModel(self.model_path, provider=provider, input_dynamic_shape=self.input_dynamic_shape, model_name=model_name) + else: + self.model = OnnxModelPickable(self.model_path, provider=provider, ) + else: + raise 'check model suffix , support engine/tjm/onnx now.' diff --git a/options/hifi_test_options.py b/options/hifi_test_options.py new file mode 100644 index 0000000000000000000000000000000000000000..54e6e83589fa72f5a878d8595402a3bef41ea6a3 --- /dev/null +++ b/options/hifi_test_options.py @@ -0,0 +1,41 @@ +import argparse + + +class HifiTestOptions: + def __init__(self): + self.parser = argparse.ArgumentParser() + self.initialized = False + + def initialize(self): + self.parser.add_argument('--model_name', type=str, default='er8_bs1', help='er8_bs1') + self.parser.add_argument('--input_size', type=int, default='256') + self.parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU') + + self.parser.add_argument('--src_img_path', type=str, default='data/source/elon-musk1.jpg') + self.parser.add_argument('--video_path', type=str, default='data/source/demo.mp4') + self.parser.add_argument('--video_to_1080p', action="store_true", help='change video resolution to 1080p') + self.parser.add_argument('--mode', type=str, default='default', help='default merge') + self.parser.add_argument('--align_method', type=str, default='68', help='face align method:68 5class') + + self.parser.add_argument('--use_gfpgan', action="store_true", help='use gfpgan for sr or not') + self.parser.add_argument('--sr_weight', type=float, default=1.0) + + self.parser.add_argument('--use_color_trans', action="store_true", help='use color transfer or not') + self.parser.add_argument('--color_trans_mode', type=str, default='rct', help='rct lct mkl idt sot') + + self.parser.add_argument('--output_dir', type=str, default='data/output') + + + def parse(self, save=True): + if not self.initialized: + self.initialize() + self.opt = self.parser.parse_args() + + str_ids = self.opt.gpu_ids.split(',') + self.opt.gpu_ids = [] + for str_id in str_ids: + id = int(str_id) + if id >= 0: + self.opt.gpu_ids.append(id) + + return self.opt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3e563bfd201362e120e280c1b357e8f6986f014b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,14 @@ +cv2box==0.4.9 +gradio==4.41.0 +numexpr==2.8.3 +numpy==1.23.5 +onnxruntime==1.14.1 +onnxruntime_gpu==1.16.3 +opencv_contrib_python==4.7.0.72 +opencv_python==4.7.0.68 +opencv_python_headless==4.7.0.72 +openpyxl==3.0.10 +pycryptodome==3.20.0 +scikit_image==0.18.0 +scipy==1.9.1 +xlrd==2.0.1 \ No newline at end of file