Spaces:
Sleeping
Sleeping
File size: 4,257 Bytes
9310327 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
import argparse
import datetime
import os
import json
import numpy as np
import matplotlib.pyplot as plt
import cv2
from PIL import Image
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential, load_model, model_from_json
from tensorflow.compat.v1.keras.backend import set_session
from facial_analysis import FacialImageProcessing
class NpEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, np.integer):
return int(obj)
if isinstance(obj, np.floating):
return float(obj)
if isinstance(obj, np.ndarray):
return obj.tolist()
return super(NpEncoder, self).default(obj)
def initialize():
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.compat.v1.Session(config=config)
set_session(sess)
def mobilenet_preprocess_input(x, **kwargs):
x[..., 0] -= 103.939
x[..., 1] -= 116.779
x[..., 2] -= 123.68
return x
def detect_emotion(frame_bgr):
imgProcessing = FacialImageProcessing(False)
model = load_model('./models/affectnet_emotions/mobilenet_7.h5')
# print(model.summary())
preprocessing_function = mobilenet_preprocess_input
INPUT_SIZE = (224, 224)
idx_to_class = {0: 'Anger', 1: 'Disgust', 2: 'Fear',
3: 'Happiness', 4: 'Neutral', 5: 'Sadness', 6: 'Surprise'}
frame = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
bounding_boxes, points = imgProcessing.detect_faces(frame)
points = points.T
detections = {"id": str(datetime.datetime.now())}
for bbox, p in zip(bounding_boxes, points):
face_pred = {}
box = bbox.astype(np.int)
x1, y1, x2, y2 = box[0:4]
face_img = frame[y1:y2, x1:x2, :]
try:
face_img = cv2.resize(face_img, INPUT_SIZE)
except:
break
inp = face_img.astype(np.float32)
inp[..., 0] -= 103.939
inp[..., 1] -= 116.779
inp[..., 2] -= 123.68
inp = np.expand_dims(inp, axis=0)
scores = model.predict(inp)[0]
frame = cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 9, 12), 4)
cv2.putText(frame, idx_to_class[np.argmax(scores)] + ' ' + str(scores[np.argmax(
scores)]), (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)
face_pred["face_bbox"] = [x1,y1,x2,y2]
face_pred["emotion_predicted"] = idx_to_class[np.argmax(scores)]
all_scores = {}
for i in range(len(scores)):
all_scores[str(idx_to_class[i])] = scores[i]
face_pred["scores"] = all_scores
detections["face"] = face_pred
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
print(detections)
return frame, detections
def process_video(video):
basename = os.path.basename(video)
name_only = os.path.splitext(basename)[0]
video_outputpath = os.path.join('./output',basename)
json_outputpath = os.path.join('./output',name_only + '.json')
# Writing to sample.json
with open(json_outputpath, "w") as jsonfile:
videocap = cv2.VideoCapture(video) # fpath)
ret, frame = videocap.read()
fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
fps = 24.0
size = (frame.shape[1], frame.shape[0])
out = cv2.VideoWriter(video_outputpath, fourcc, fps, size)
# for i in range(len(image_array)):
# out.write(image_array[i])
max_frame = 500
cnt = 0
while ret == True and cnt < 50:
processed_frame, detections = detect_emotion(frame)
json_object = json.dumps(detections, indent=4, cls=NpEncoder)
jsonfile.write(json_object)
cv2.imshow('img', np.array(processed_frame, dtype=np.uint8))
out.write(processed_frame)
ret, frame = videocap.read()
cv2.waitKey(1)
cnt += 1
videocap.release()
cv2.destroyAllWindows()
return out
def main():
parser = argparse.ArgumentParser(description='Analysis of Video')
parser.add_argument(
'-v', '--video', help='Video to be analysed', required=True)
args = parser.parse_args()
process_video(args.video)
if __name__ == '__main__':
main()
|