Spaces:
Sleeping
Sleeping
Delete fer.py
Browse files
fer.py
DELETED
@@ -1,351 +0,0 @@
|
|
1 |
-
#!/usr/bin/python3
|
2 |
-
# -*- coding: utf-8 -*-
|
3 |
-
|
4 |
-
# MIT License
|
5 |
-
#
|
6 |
-
# Copyright (c) 2018 Justin Shenk
|
7 |
-
#
|
8 |
-
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
9 |
-
# of this software and associated documentation files (the "Software"), to deal
|
10 |
-
# in the Software without restriction, including without limitation the rights
|
11 |
-
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
12 |
-
# copies of the Software, and to permit persons to whom the Software is
|
13 |
-
# furnished to do so, subject to the following conditions:
|
14 |
-
#
|
15 |
-
# The above copyright notice and this permission notice shall be included in all
|
16 |
-
# copies or substantial portions of the Software.
|
17 |
-
#
|
18 |
-
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
19 |
-
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
20 |
-
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
21 |
-
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
22 |
-
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
23 |
-
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
24 |
-
# SOFTWARE.
|
25 |
-
|
26 |
-
# IMPORTANT:
|
27 |
-
#
|
28 |
-
# This code is derived from Iván de Paz Centeno's implementation of MTCNN
|
29 |
-
# (https://github.com/ipazc/mtcnn/) and Octavia Arriaga's facial expression recognition repo
|
30 |
-
# (https://github.com/oarriaga/face_classification).
|
31 |
-
#
|
32 |
-
import logging
|
33 |
-
import os
|
34 |
-
import pkg_resources
|
35 |
-
import requests
|
36 |
-
import sys
|
37 |
-
from typing import Sequence, Tuple, Union
|
38 |
-
|
39 |
-
import cv2
|
40 |
-
import numpy as np
|
41 |
-
|
42 |
-
from tensorflow.keras.models import load_model
|
43 |
-
|
44 |
-
|
45 |
-
from .utils import load_image
|
46 |
-
|
47 |
-
logging.basicConfig(level=logging.INFO)
|
48 |
-
log = logging.getLogger("fer")
|
49 |
-
|
50 |
-
NumpyRects = Union[np.ndarray, Sequence[Tuple[int, int, int, int]]]
|
51 |
-
|
52 |
-
__author__ = "Justin Shenk"
|
53 |
-
|
54 |
-
PADDING = 40
|
55 |
-
SERVER_URL = "http://localhost:8501/v1/models/emotion_model:predict"
|
56 |
-
|
57 |
-
|
58 |
-
class FER(object):
|
59 |
-
"""
|
60 |
-
Allows performing Facial Expression Recognition ->
|
61 |
-
a) Detection of faces
|
62 |
-
b) Detection of emotions
|
63 |
-
"""
|
64 |
-
|
65 |
-
def __init__(
|
66 |
-
self,
|
67 |
-
cascade_file: str = None,
|
68 |
-
mtcnn=False,
|
69 |
-
tfserving: bool = False,
|
70 |
-
scale_factor: float = 1.1,
|
71 |
-
min_face_size: int = 50,
|
72 |
-
min_neighbors: int = 5,
|
73 |
-
offsets: tuple = (10, 10),
|
74 |
-
):
|
75 |
-
"""
|
76 |
-
Initializes the face detector and Keras model for facial expression recognition.
|
77 |
-
:param cascade_file: file URI with the Haar cascade for face classification
|
78 |
-
:param mtcnn: use MTCNN network for face detection (not yet implemented)
|
79 |
-
:param scale_factor: parameter specifying how much the image size is reduced at each image scale
|
80 |
-
:param min_face_size: minimum size of the face to detect
|
81 |
-
:param offsets: padding around face before classification
|
82 |
-
"""
|
83 |
-
self.__scale_factor = scale_factor
|
84 |
-
self.__min_neighbors = min_neighbors
|
85 |
-
self.__min_face_size = min_face_size
|
86 |
-
self.__offsets = offsets
|
87 |
-
self.tfserving = tfserving
|
88 |
-
|
89 |
-
if cascade_file is None:
|
90 |
-
cascade_file = cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
|
91 |
-
|
92 |
-
if mtcnn:
|
93 |
-
try:
|
94 |
-
from facenet_pytorch import MTCNN
|
95 |
-
except ImportError:
|
96 |
-
raise Exception(
|
97 |
-
"MTCNN not installed, install it with pip install facenet-pytorch and from facenet_pytorch import MTCNN"
|
98 |
-
)
|
99 |
-
self.__face_detector = "mtcnn"
|
100 |
-
self._mtcnn = MTCNN(keep_all=True)
|
101 |
-
else:
|
102 |
-
self.__face_detector = cv2.CascadeClassifier(cascade_file)
|
103 |
-
|
104 |
-
self._initialize_model()
|
105 |
-
|
106 |
-
def _initialize_model(self):
|
107 |
-
if self.tfserving:
|
108 |
-
self.__emotion_target_size = (64, 64) # hardcoded for now
|
109 |
-
else:
|
110 |
-
# Local Keras model
|
111 |
-
emotion_model = pkg_resources.resource_filename(
|
112 |
-
"fer", "data/emotion_model.hdf5"
|
113 |
-
)
|
114 |
-
log.debug("Emotion model: {}".format(emotion_model))
|
115 |
-
self.__emotion_classifier = load_model(emotion_model, compile=False)
|
116 |
-
self.__emotion_classifier.make_predict_function()
|
117 |
-
self.__emotion_target_size = self.__emotion_classifier.input_shape[1:3]
|
118 |
-
return
|
119 |
-
|
120 |
-
def _classify_emotions(self, gray_faces: np.ndarray) -> np.ndarray: # b x w x h
|
121 |
-
"""Run faces through online or offline classifier."""
|
122 |
-
if self.tfserving:
|
123 |
-
gray_faces = np.expand_dims(gray_faces, -1) # to 4-dimensions
|
124 |
-
instances = gray_faces.tolist()
|
125 |
-
response = requests.post(SERVER_URL, json={"instances": instances})
|
126 |
-
response.raise_for_status()
|
127 |
-
|
128 |
-
emotion_predictions = response.json()["predictions"]
|
129 |
-
return emotion_predictions
|
130 |
-
else:
|
131 |
-
return self.__emotion_classifier(gray_faces)
|
132 |
-
|
133 |
-
@staticmethod
|
134 |
-
def pad(image):
|
135 |
-
"""Pad image."""
|
136 |
-
row, col = image.shape[:2]
|
137 |
-
bottom = image[row - 2 : row, 0:col]
|
138 |
-
mean = cv2.mean(bottom)[0]
|
139 |
-
|
140 |
-
padded_image = cv2.copyMakeBorder(
|
141 |
-
image,
|
142 |
-
top=PADDING,
|
143 |
-
bottom=PADDING,
|
144 |
-
left=PADDING,
|
145 |
-
right=PADDING,
|
146 |
-
borderType=cv2.BORDER_CONSTANT,
|
147 |
-
value=[mean, mean, mean],
|
148 |
-
)
|
149 |
-
return padded_image
|
150 |
-
|
151 |
-
@staticmethod
|
152 |
-
def depad(image):
|
153 |
-
row, col = image.shape[:2]
|
154 |
-
return image[PADDING : row - PADDING, PADDING : col - PADDING]
|
155 |
-
|
156 |
-
@staticmethod
|
157 |
-
def tosquare(bbox):
|
158 |
-
"""Convert bounding box to square by elongating shorter side."""
|
159 |
-
x, y, w, h = bbox
|
160 |
-
if h > w:
|
161 |
-
diff = h - w
|
162 |
-
x -= diff // 2
|
163 |
-
w += diff
|
164 |
-
elif w > h:
|
165 |
-
diff = w - h
|
166 |
-
y -= diff // 2
|
167 |
-
h += diff
|
168 |
-
if w != h:
|
169 |
-
log.debug(f"{w} is not {h}")
|
170 |
-
|
171 |
-
return (x, y, w, h)
|
172 |
-
|
173 |
-
def find_faces(self, img: np.ndarray, bgr=True) -> list:
|
174 |
-
"""Image to list of faces bounding boxes(x,y,w,h)"""
|
175 |
-
if isinstance(self.__face_detector, cv2.CascadeClassifier):
|
176 |
-
if bgr:
|
177 |
-
gray_image_array = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
178 |
-
else: # assume gray
|
179 |
-
gray_image_array = img
|
180 |
-
|
181 |
-
faces = self.__face_detector.detectMultiScale(
|
182 |
-
gray_image_array,
|
183 |
-
scaleFactor=self.__scale_factor,
|
184 |
-
minNeighbors=self.__min_neighbors,
|
185 |
-
flags=cv2.CASCADE_SCALE_IMAGE,
|
186 |
-
minSize=(self.__min_face_size, self.__min_face_size),
|
187 |
-
)
|
188 |
-
elif self.__face_detector == "mtcnn":
|
189 |
-
boxes, probs = self._mtcnn.detect(img)
|
190 |
-
faces = []
|
191 |
-
if type(boxes) == np.ndarray:
|
192 |
-
for face in boxes:
|
193 |
-
faces.append(
|
194 |
-
[
|
195 |
-
int(face[0]),
|
196 |
-
int(face[1]),
|
197 |
-
int(face[2]) - int(face[0]),
|
198 |
-
int(face[3]) - int(face[1]),
|
199 |
-
]
|
200 |
-
)
|
201 |
-
|
202 |
-
return faces
|
203 |
-
|
204 |
-
@staticmethod
|
205 |
-
def __preprocess_input(x, v2=False):
|
206 |
-
x = x.astype("float32")
|
207 |
-
x = x / 255.0
|
208 |
-
if v2:
|
209 |
-
x = x - 0.5
|
210 |
-
x = x * 2.0
|
211 |
-
return x
|
212 |
-
|
213 |
-
def __apply_offsets(self, face_coordinates):
|
214 |
-
"""Offset face coordinates with padding before classification.
|
215 |
-
x1, x2, y1, y2 = 0, 100, 0, 100 becomes -10, 110, -10, 110
|
216 |
-
"""
|
217 |
-
x, y, width, height = face_coordinates
|
218 |
-
x_off, y_off = self.__offsets
|
219 |
-
x1 = x - x_off
|
220 |
-
x2 = x + width + x_off
|
221 |
-
y1 = y - y_off
|
222 |
-
y2 = y + height + y_off
|
223 |
-
return x1, x2, y1, y2
|
224 |
-
|
225 |
-
@staticmethod
|
226 |
-
def _get_labels():
|
227 |
-
return {
|
228 |
-
0: "angry",
|
229 |
-
1: "disgust",
|
230 |
-
2: "fear",
|
231 |
-
3: "happy",
|
232 |
-
4: "sad",
|
233 |
-
5: "surprise",
|
234 |
-
6: "neutral",
|
235 |
-
}
|
236 |
-
|
237 |
-
def detect_emotions(
|
238 |
-
self, img: np.ndarray, face_rectangles: NumpyRects = None
|
239 |
-
) -> list:
|
240 |
-
"""
|
241 |
-
Detects bounding boxes from the specified image with ranking of emotions.
|
242 |
-
:param img: exact image path, numpy array (BGR or gray) or based64 encoded images
|
243 |
-
could be passed.
|
244 |
-
:return: list containing all the bounding boxes detected with their emotions.
|
245 |
-
"""
|
246 |
-
img = load_image(img)
|
247 |
-
|
248 |
-
emotion_labels = self._get_labels()
|
249 |
-
|
250 |
-
if face_rectangles is None:
|
251 |
-
face_rectangles = self.find_faces(img, bgr=True)
|
252 |
-
|
253 |
-
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
254 |
-
gray_img = self.pad(gray_img)
|
255 |
-
|
256 |
-
emotions = []
|
257 |
-
gray_faces = []
|
258 |
-
if face_rectangles is not None:
|
259 |
-
for face_coordinates in face_rectangles:
|
260 |
-
face_coordinates = self.tosquare(face_coordinates)
|
261 |
-
|
262 |
-
# offset to expand bounding box
|
263 |
-
# Note: x1 and y1 can be negative
|
264 |
-
x1, x2, y1, y2 = self.__apply_offsets(face_coordinates)
|
265 |
-
|
266 |
-
# account for padding in bounding box coordinates
|
267 |
-
x1 += PADDING
|
268 |
-
y1 += PADDING
|
269 |
-
x2 += PADDING
|
270 |
-
y2 += PADDING
|
271 |
-
x1 = np.clip(x1, a_min=0, a_max=None)
|
272 |
-
y1 = np.clip(y1, a_min=0, a_max=None)
|
273 |
-
|
274 |
-
gray_face = gray_img[max(0, y1) : y2, max(0, x1) : x2]
|
275 |
-
|
276 |
-
try:
|
277 |
-
gray_face = cv2.resize(gray_face, self.__emotion_target_size)
|
278 |
-
except Exception as e:
|
279 |
-
log.warn("{} resize failed: {}".format(gray_face.shape, e))
|
280 |
-
continue
|
281 |
-
|
282 |
-
# Local Keras model
|
283 |
-
gray_face = self.__preprocess_input(gray_face, True)
|
284 |
-
gray_faces.append(gray_face)
|
285 |
-
|
286 |
-
# predict all faces
|
287 |
-
if not len(gray_faces):
|
288 |
-
return emotions # no valid faces
|
289 |
-
|
290 |
-
# classify emotions
|
291 |
-
emotion_predictions = self._classify_emotions(np.array(gray_faces))
|
292 |
-
|
293 |
-
# label scores
|
294 |
-
for face_idx, face in enumerate(emotion_predictions):
|
295 |
-
labelled_emotions = {
|
296 |
-
emotion_labels[idx]: round(float(score), 2)
|
297 |
-
for idx, score in enumerate(face)
|
298 |
-
}
|
299 |
-
|
300 |
-
emotions.append(
|
301 |
-
dict(box=face_rectangles[face_idx], emotions=labelled_emotions)
|
302 |
-
)
|
303 |
-
|
304 |
-
self.emotions = emotions
|
305 |
-
|
306 |
-
return emotions
|
307 |
-
|
308 |
-
def top_emotion(
|
309 |
-
self, img: np.ndarray
|
310 |
-
) -> Tuple[Union[str, None], Union[float, None]]:
|
311 |
-
"""Convenience wrapper for `detect_emotions` returning only top emotion for first face in frame.
|
312 |
-
:param img: image to process
|
313 |
-
:return: top emotion and score (for first face in frame) or (None, None)
|
314 |
-
|
315 |
-
"""
|
316 |
-
emotions = self.detect_emotions(img=img)
|
317 |
-
top_emotions = [
|
318 |
-
max(e["emotions"], key=lambda key: e["emotions"][key]) for e in emotions
|
319 |
-
]
|
320 |
-
|
321 |
-
# Take first face
|
322 |
-
if len(top_emotions):
|
323 |
-
top_emotion = top_emotions[0]
|
324 |
-
else:
|
325 |
-
return (None, None)
|
326 |
-
score = emotions[0]["emotions"][top_emotion]
|
327 |
-
|
328 |
-
return top_emotion, score
|
329 |
-
|
330 |
-
|
331 |
-
def parse_arguments(args):
|
332 |
-
import argparse
|
333 |
-
|
334 |
-
parser = argparse.ArgumentParser()
|
335 |
-
parser.add_argument("--image", type=str, help="Image filepath")
|
336 |
-
return parser.parse_args()
|
337 |
-
|
338 |
-
|
339 |
-
def top_emotion():
|
340 |
-
args = parse_arguments(sys.argv)
|
341 |
-
fer = FER()
|
342 |
-
top_emotion, score = fer.top_emotion(args.image)
|
343 |
-
print(top_emotion, score)
|
344 |
-
|
345 |
-
|
346 |
-
def main():
|
347 |
-
top_emotion()
|
348 |
-
|
349 |
-
|
350 |
-
if __name__ == "__main__":
|
351 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|