MuseVSpace / MuseV /MMCM /mmcm /vision /human /insightface_predictor.py
anchorxia's picture
add mmcm
a57c6eb
# -*- coding: utf-8 -*-
"""
Created on Tue Sep 8 13:31:25 2020
@author: infguo
"""
import os
from typing import Dict
import argparse
import json
import traceback
import hashlib
import cv2
from moviepy.editor import VideoFileClip
# TODO
# 该部分与源代码相比,修改了insight_face的输出接口,将性别分数透传出来,用于后续更精准的决策
def inference(frame, app, max_face=10):
# Start to perform face recognition
try: # Handle exception
faces = app.get(frame, max_num=max_face)
except Exception as e:
print("is discarded due to exception {}!".format(e))
return
if (
len(faces) == 0
): # If the landmarks cannot be detected, the img will be discarded
return
return faces
def predict(app, video_path, video_map, sample_fps):
from insightface.app import FaceAnalysis
video_name = ".".join(video_path.split("/")[-1].split(".")[:-1])
# video_hash_code = (os.popen('md5sum {}'.format(video_path))).readlines()[0].split(' ')[0]
with open(video_path, "rb") as fd:
data = fd.read()
video_hash_code = hashlib.md5(data).hexdigest()
assert video_hash_code == video_map["video_file_hash_code"]
# Capture video
video = VideoFileClip(video_path)
video = video.crop(*video_map["content_box"])
fps = video.fps
duration = video.duration
total_frames = int(duration * fps)
width, height = video.size
print("fps, frame_count, width, height:", fps, total_frames, width, height)
video_map["detect_fps"] = sample_fps
video_map["face_detections"] = []
cnt_frame, step = 0, 0
fps = video_map["sample_fps"]
for frame in video.iter_frames(fps=fps):
if cnt_frame >= step:
step += fps / sample_fps
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
faces = inference(frame, app, max_face=0)
if faces and len(faces) > 0:
for f in faces:
f["bbox"] = f["bbox"].tolist()
f["kps"] = f["kps"].tolist()
f["embedding"] = f["embedding"].tolist()
f["det_score"] = str(f["det_score"])
f["gender"] = str(f["gender"])
f["age"] = str(f["age"])
else:
faces = None
video_map["face_detections"].append(
{"frame_idx": cnt_frame, "faces": faces}
)
cnt_frame += 1
return video_map
class InsightfacePredictor(object):
def __init__(
self,
sample_fps=10,
) -> None:
# Load models
self.sample_fps = sample_fps
self.app = FaceAnalysis(
allowed_modules=["detection", "genderage", "recognition"],
providers=["CUDAExecutionProvider"],
provider_options=[{"device_id": "0"}],
)
self.app.prepare(ctx_id=0, det_thresh=0.3, det_size=(640, 640))
def __call__(self, video_path, video_map) -> Dict:
video_info = predict(video_path, video_map, sample_fps=self.sample_fps)
return video_info