|
import subprocess |
|
subprocess.run(["sh", "tddfa/build.sh"]) |
|
|
|
import gradio as gr |
|
from gradio.components import Dropdown |
|
|
|
import cv2 as cv |
|
import torch |
|
from torchvision import transforms |
|
from DeePixBiS.Model import DeePixBiS |
|
|
|
import yaml |
|
import numpy as np |
|
import pandas as pd |
|
from skimage.io import imread, imsave |
|
|
|
from tddfa.utils.depth import depth |
|
from tddfa.TDDFA import TDDFA |
|
|
|
import torch.optim as optim |
|
from DSDG.DUM.models.CDCNs_u import Conv2d_cd, CDCN_u |
|
|
|
import os |
|
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' |
|
os.environ['OMP_NUM_THREADS'] = '4' |
|
|
|
|
|
device = torch.device("cpu") |
|
labels = ['Live', 'Spoof'] |
|
pix_threshhold = 0.45 |
|
dsdg_threshold = 0.003 |
|
examples = [ |
|
['examples/1_1_21_2_33_scene_fake.jpg', "DeePixBiS"], |
|
['examples/frame150_real.jpg', "DeePixBiS"], |
|
['examples/1_2.avi_125_real.jpg', "DeePixBiS"], |
|
['examples/1_3.avi_25_fake.jpg', "DeePixBiS"]] |
|
faceClassifier = cv.CascadeClassifier('./DeePixBiS/Classifiers/haarface.xml') |
|
tfms = transforms.Compose([ |
|
transforms.ToPILImage(), |
|
transforms.Resize((224, 224)), |
|
transforms.ToTensor(), |
|
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) |
|
]) |
|
model = DeePixBiS(pretrained=False) |
|
model.load_state_dict(torch.load('./DeePixBiS/DeePixBiS.pth')) |
|
model.eval() |
|
|
|
|
|
depth_config_path = 'tddfa/configs/mb05_120x120.yml' |
|
cfg = yaml.load(open(depth_config_path), Loader=yaml.SafeLoader) |
|
tddfa = TDDFA(gpu_mode=False, **cfg) |
|
|
|
|
|
model = CDCN_u(basic_conv=Conv2d_cd, theta=0.7) |
|
model = model.to(device) |
|
weights = torch.load('./DSDG/DUM/checkpoint/CDCN_U_P1_updated.pkl', map_location=device) |
|
model.load_state_dict(weights) |
|
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.00005) |
|
model.eval() |
|
|
|
|
|
class Normaliztion_valtest(object): |
|
""" |
|
same as mxnet, normalize into [-1, 1] |
|
image = (image - 127.5)/128 |
|
""" |
|
def __call__(self, image_x): |
|
image_x = (image_x - 127.5) / 128 |
|
return image_x |
|
|
|
|
|
def prepare_data(images, boxes, depths): |
|
transform = transforms.Compose([Normaliztion_valtest()]) |
|
files_total = 1 |
|
image_x = np.zeros((files_total, 256, 256, 3)) |
|
depth_x = np.ones((files_total, 32, 32)) |
|
|
|
for i, (image, bbox, depth_img) in enumerate( |
|
zip(images, boxes, depths)): |
|
x, y, w, h = bbox |
|
depth_img = cv.cvtColor(depth_img, cv.COLOR_RGB2GRAY) |
|
image = image[y:y + h, x:x + w] |
|
depth_img = depth_img[y:y + h, x:x + w] |
|
|
|
image_x[i, :, :, :] = cv.resize(image, (256, 256)) |
|
|
|
depth_x[i, :, :] = cv.resize(depth_img, (32, 32)) |
|
image_x = image_x.transpose((0, 3, 1, 2)) |
|
image_x = transform(image_x) |
|
image_x = torch.from_numpy(image_x.astype(float)).float() |
|
depth_x = torch.from_numpy(depth_x.astype(float)).float() |
|
return image_x, depth_x |
|
|
|
|
|
def find_largest_face(faces): |
|
largest_face = None |
|
largest_area = 0 |
|
|
|
for (x, y, w, h) in faces: |
|
area = w * h |
|
if area > largest_area: |
|
largest_area = area |
|
largest_face = (x, y, w, h) |
|
return largest_face |
|
|
|
|
|
def inference(img, model_name): |
|
confidences = {} |
|
grey = cv.cvtColor(img, cv.COLOR_BGR2GRAY) |
|
faces = faceClassifier.detectMultiScale( |
|
grey, scaleFactor=1.1, minNeighbors=4) |
|
face = find_largest_face(faces) |
|
|
|
if face is not None: |
|
x, y, w, h = face |
|
faceRegion = img[y:y + h, x:x + w] |
|
faceRegion = cv.cvtColor(faceRegion, cv.COLOR_BGR2RGB) |
|
faceRegion = tfms(faceRegion) |
|
faceRegion = faceRegion.unsqueeze(0) |
|
|
|
if model_name == 'DeePixBiS': |
|
mask, binary = model.forward(faceRegion) |
|
res = torch.mean(mask).item() |
|
cls = 'Real' if res >= pix_threshhold else 'Spoof' |
|
res = 1 - res |
|
|
|
else: |
|
dense_flag = True |
|
boxes = list(face) |
|
boxes.append(1) |
|
param_lst, roi_box_lst = tddfa(img, [boxes]) |
|
|
|
ver_lst = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag) |
|
depth_img = depth(img, ver_lst, tddfa.tri, with_bg_flag=False) |
|
with torch.no_grad(): |
|
map_score_list = [] |
|
image_x, map_x = prepare_data([img], [list(face)], [depth_img]) |
|
|
|
image_x = image_x.unsqueeze(0) |
|
map_x = map_x.unsqueeze(0) |
|
inputs = image_x.to(device) |
|
test_maps = map_x.to(device) |
|
optimizer.zero_grad() |
|
|
|
map_score = 0.0 |
|
for frame_t in range(inputs.shape[1]): |
|
mu, logvar, map_x, x_concat, x_Block1, x_Block2, x_Block3, x_input = model(inputs[:, frame_t, :, :, :]) |
|
|
|
score_norm = torch.sum(mu) / torch.sum(test_maps[:, frame_t, :, :]) |
|
map_score += score_norm |
|
map_score = map_score / inputs.shape[1] |
|
map_score_list.append(map_score) |
|
|
|
res = map_score_list[0].item() |
|
if res > 10: |
|
res = 0.0 |
|
cls = 'Real' if res >= dsdg_threshold else 'Spoof' |
|
res = res * 100 |
|
|
|
label = f'{cls} {res:.2f}' |
|
confidences = {label: res} |
|
color = color = (0, 255, 0) if cls == 'Real' else (255, 0, 0) |
|
cv.rectangle(img, (x, y), (x + w, y + h), color, 2) |
|
cv.putText(img, label, (x, y + h + 30), |
|
cv.FONT_HERSHEY_COMPLEX, 1, color) |
|
|
|
return img, confidences |
|
|
|
|
|
if __name__ == '__main__': |
|
demo = gr.Interface( |
|
fn=inference, |
|
inputs=[gr.Image(source='webcam', shape=None, type='numpy'), |
|
Dropdown(["DeePixBiS", "DSDG"], value="DeePixBiS")], |
|
outputs=["image", gr.Label(num_top_classes=2)], |
|
examples=examples).queue(concurrency_count=2) |
|
demo.launch(share=False) |
|
|