Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
""" | |
Created on Tue Sep 8 13:31:25 2020 | |
@author: infguo | |
""" | |
import os | |
from typing import Dict | |
import argparse | |
import json | |
import traceback | |
import hashlib | |
import cv2 | |
from moviepy.editor import VideoFileClip | |
# TODO | |
# 该部分与源代码相比,修改了insight_face的输出接口,将性别分数透传出来,用于后续更精准的决策 | |
def inference(frame, app, max_face=10): | |
# Start to perform face recognition | |
try: # Handle exception | |
faces = app.get(frame, max_num=max_face) | |
except Exception as e: | |
print("is discarded due to exception {}!".format(e)) | |
return | |
if ( | |
len(faces) == 0 | |
): # If the landmarks cannot be detected, the img will be discarded | |
return | |
return faces | |
def predict(app, video_path, video_map, sample_fps): | |
from insightface.app import FaceAnalysis | |
video_name = ".".join(video_path.split("/")[-1].split(".")[:-1]) | |
# video_hash_code = (os.popen('md5sum {}'.format(video_path))).readlines()[0].split(' ')[0] | |
with open(video_path, "rb") as fd: | |
data = fd.read() | |
video_hash_code = hashlib.md5(data).hexdigest() | |
assert video_hash_code == video_map["video_file_hash_code"] | |
# Capture video | |
video = VideoFileClip(video_path) | |
video = video.crop(*video_map["content_box"]) | |
fps = video.fps | |
duration = video.duration | |
total_frames = int(duration * fps) | |
width, height = video.size | |
print("fps, frame_count, width, height:", fps, total_frames, width, height) | |
video_map["detect_fps"] = sample_fps | |
video_map["face_detections"] = [] | |
cnt_frame, step = 0, 0 | |
fps = video_map["sample_fps"] | |
for frame in video.iter_frames(fps=fps): | |
if cnt_frame >= step: | |
step += fps / sample_fps | |
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) | |
faces = inference(frame, app, max_face=0) | |
if faces and len(faces) > 0: | |
for f in faces: | |
f["bbox"] = f["bbox"].tolist() | |
f["kps"] = f["kps"].tolist() | |
f["embedding"] = f["embedding"].tolist() | |
f["det_score"] = str(f["det_score"]) | |
f["gender"] = str(f["gender"]) | |
f["age"] = str(f["age"]) | |
else: | |
faces = None | |
video_map["face_detections"].append( | |
{"frame_idx": cnt_frame, "faces": faces} | |
) | |
cnt_frame += 1 | |
return video_map | |
class InsightfacePredictor(object): | |
def __init__( | |
self, | |
sample_fps=10, | |
) -> None: | |
# Load models | |
self.sample_fps = sample_fps | |
self.app = FaceAnalysis( | |
allowed_modules=["detection", "genderage", "recognition"], | |
providers=["CUDAExecutionProvider"], | |
provider_options=[{"device_id": "0"}], | |
) | |
self.app.prepare(ctx_id=0, det_thresh=0.3, det_size=(640, 640)) | |
def __call__(self, video_path, video_map) -> Dict: | |
video_info = predict(video_path, video_map, sample_fps=self.sample_fps) | |
return video_info | |