MerlenMaven commited on
Commit
c3c3648
·
verified ·
1 Parent(s): 0fb2076

Upload 5 files

Browse files
Files changed (5) hide show
  1. __init__.py +45 -0
  2. classes.py +381 -0
  3. emotionsmultilanguage.py +20 -0
  4. fer.py +351 -0
  5. utils.py +116 -0
__init__.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ # MIT License
5
+ #
6
+ # Copyright (c) 2018 Justin Shenk
7
+ #
8
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
9
+ # of this software and associated documentation files (the "Software"), to deal
10
+ # in the Software without restriction, including without limitation the rights
11
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12
+ # copies of the Software, and to permit persons to whom the Software is
13
+ # furnished to do so, subject to the following conditions:
14
+ #
15
+ # The above copyright notice and this permission notice shall be included in all
16
+ # copies or substantial portions of the Software.
17
+ #
18
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
+ # SOFTWARE.
25
+ import logging
26
+
27
+ from .classes import Video
28
+ from .fer import FER
29
+
30
+ log = logging.getLogger("fer")
31
+ log.setLevel(logging.INFO)
32
+
33
+ __version__ = "22.5.1"
34
+
35
+ __title__ = "fer"
36
+ __description__ = "Facial expression recognition from images"
37
+ __url__ = "https://github.com/justinshenk/fer"
38
+ __uri__ = __url__
39
+ __doc__ = __description__ + " <" + __url__ + ">"
40
+
41
+ __author__ = "Justin Shenk"
42
+ __email__ = "[email protected]"
43
+
44
+ __license__ = "MIT"
45
+ __copyright__ = "Copyright (c) 2019 " + __author__
classes.py ADDED
@@ -0,0 +1,381 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ import csv
4
+ import logging
5
+ import os
6
+ import re
7
+ from moviepy import VideoFileClip,AudioFileClip, CompositeAudioClip
8
+ from pathlib import Path
9
+ from typing import Optional, Union
10
+ from zipfile import ZipFile
11
+
12
+ import cv2
13
+ import pandas as pd
14
+
15
+ from tqdm import tqdm
16
+ from tqdm.contrib.logging import logging_redirect_tqdm
17
+
18
+ from .utils import draw_annotations
19
+
20
+ log = logging.getLogger("fer")
21
+
22
+
23
+ class Video(object):
24
+ def __init__(
25
+ self,
26
+ video_file: str,
27
+ outdir: str = "output",
28
+ first_face_only: bool = True,
29
+ tempfile: Optional[str] = None,
30
+ ):
31
+ """Video class for extracting and saving frames for emotion detection.
32
+ :param video_file - str
33
+ :param outdir - str
34
+ :param tempdir - str
35
+ :param first_face_only - bool
36
+ :param tempfile - str
37
+ """
38
+ assert os.path.exists(video_file), "Video file not found at {}".format(
39
+ os.path.abspath(video_file)
40
+ )
41
+ self.cap = cv2.VideoCapture(video_file)
42
+ if not os.path.isdir(outdir):
43
+ os.makedirs(outdir, exist_ok=True)
44
+ self.outdir = outdir
45
+
46
+ if not first_face_only:
47
+ log.error("Only single-face charting is implemented")
48
+ self.first_face_only = first_face_only
49
+ self.tempfile = tempfile
50
+ self.filepath = video_file
51
+ self.filename = "".join(self.filepath.split("/")[-1])
52
+
53
+ @staticmethod
54
+ def get_max_faces(data: list) -> int:
55
+ """Get max number of faces detected in a series of frames, eg 3"""
56
+ max = 0
57
+ for frame in data:
58
+ for face in frame:
59
+ if len(face) > max:
60
+ max = len(face)
61
+ return max
62
+
63
+ @staticmethod
64
+ def _to_dict(data: Union[dict, list]) -> dict:
65
+ emotions = []
66
+
67
+ frame = data[0]
68
+ if isinstance(frame, list):
69
+ try:
70
+ emotions = frame[0]["emotions"].keys()
71
+ except IndexError:
72
+ raise Exception("No data in 'data'")
73
+ elif isinstance(frame, dict):
74
+ return data
75
+
76
+ dictlist = []
77
+
78
+ for data_idx, frame in enumerate(data):
79
+ rowdict = {}
80
+ for idx, face in enumerate(list(frame)):
81
+ if not isinstance(face, dict):
82
+ break
83
+ rowdict.update({"box" + str(idx): face["box"]})
84
+ rowdict.update(
85
+ {emo + str(idx): face["emotions"][emo] for emo in emotions}
86
+ )
87
+ dictlist.append(rowdict)
88
+ return dictlist
89
+
90
+ def to_pandas(self, data: Union[pd.DataFrame, list]) -> pd.DataFrame:
91
+ """Convert results to pandas DataFrame"""
92
+ if isinstance(data, pd.DataFrame):
93
+ return data
94
+
95
+ if not len(data):
96
+ return pd.DataFrame()
97
+ datalist = self._to_dict(data)
98
+ df = pd.DataFrame(datalist)
99
+ if self.first_face_only:
100
+ df = self.get_first_face(df)
101
+ return df
102
+
103
+ @staticmethod
104
+ def get_first_face(df: pd.DataFrame) -> pd.DataFrame:
105
+ assert isinstance(df, pd.DataFrame), "Must be a pandas DataFrame"
106
+ try:
107
+ int(df.columns[0][-1])
108
+ except ValueError:
109
+ # Already only one face in df
110
+ return df
111
+
112
+ columns = [x for x in df.columns if x[-1] == "0"]
113
+ new_columns = [x[:-1] for x in columns]
114
+ single_df = df[columns]
115
+ single_df.columns = new_columns
116
+ return single_df
117
+
118
+ @staticmethod
119
+ def get_emotions(df: pd.DataFrame) -> list:
120
+ """Get emotion columsn from results."""
121
+ columns = [x for x in df.columns if "box" not in x]
122
+ return df[columns]
123
+
124
+ def to_csv(self, data, filename="data.csv"):
125
+ """Save data to csv"""
126
+
127
+ def key(item):
128
+ key_pat = re.compile(r"^(\D+)(\d+)$")
129
+ m = key_pat.match(item)
130
+ return m.group(1), int(m.group(2))
131
+
132
+ dictlist = self._to_dict(data)
133
+ columns = set().union(*(d.keys() for d in dictlist))
134
+ columns = sorted(columns, key=key) # sort by trailing number (faces)
135
+
136
+ with open("data.csv", "w", newline="") as csvfile:
137
+ writer = csv.DictWriter(csvfile, columns, lineterminator="\n")
138
+ writer.writeheader()
139
+ writer.writerows(dictlist)
140
+ return dictlist
141
+
142
+ def _close_video(self, outfile, save_frames, zip_images):
143
+ self.cap.release()
144
+ if self.display or self.save_video:
145
+ self.videowriter.release()
146
+
147
+ if self.save_video:
148
+ log.info("Completed analysis: saved to {}".format(self.tempfile or outfile))
149
+ if self.tempfile:
150
+ os.replace(self.tempfile, outfile)
151
+
152
+ if save_frames and zip_images:
153
+ log.info("Starting to Zip")
154
+ outdir = Path(self.outdir)
155
+ zip_dir = outdir / "images.zip"
156
+ images = sorted(list(outdir.glob("*.jpg")))
157
+ total = len(images)
158
+ i = 0
159
+ with ZipFile(zip_dir, "w") as zip:
160
+ for file in images:
161
+ zip.write(file, arcname=file.name)
162
+ os.remove(file)
163
+ i += 1
164
+ if i % 50 == 0:
165
+ log.info(f"Compressing: {i*100 // total}%")
166
+ log.info("Zip has finished")
167
+
168
+ def _offset_detection_box(self, faces, detection_box):
169
+ for face in faces:
170
+ original_box = face.get("box")
171
+ face["box"] = (
172
+ original_box[0] + detection_box.get("x_min"),
173
+ original_box[1] + detection_box.get("y_min"),
174
+ original_box[2],
175
+ original_box[3],
176
+ )
177
+ return faces
178
+
179
+ def _increment_frames(
180
+ self, frame, faces, video_id, root, lang="en", size_multiplier=1
181
+ ):
182
+ # Save images to `self.outdir`
183
+ imgpath = os.path.join(
184
+ self.outdir, (video_id or root) + str(self.frameCount) + ".jpg"
185
+ )
186
+
187
+ if self.annotate_frames:
188
+ frame = draw_annotations(
189
+ frame,
190
+ faces,
191
+ boxes=True,
192
+ scores=True,
193
+ lang=lang,
194
+ size_multiplier=size_multiplier,
195
+ )
196
+
197
+ if self.save_frames:
198
+ cv2.imwrite(imgpath, frame)
199
+
200
+ if self.display:
201
+ cv2.imshow("Video", frame)
202
+
203
+ if self.save_video:
204
+ self.videowriter.write(frame)
205
+
206
+ self.frameCount += 1
207
+
208
+ def analyze(
209
+ self,
210
+ detector, # fer.FER instance
211
+ display: bool = False,
212
+ output: str = "csv",
213
+ frequency: Optional[int] = None,
214
+ max_results: int = None,
215
+ save_fps: Optional[int] = None,
216
+ video_id: Optional[str] = None,
217
+ save_frames: bool = True,
218
+ save_video: bool = True,
219
+ annotate_frames: bool = True,
220
+ zip_images: bool = True,
221
+ detection_box: Optional[dict] = None,
222
+ lang: str = "en",
223
+ include_audio: bool = False,
224
+ size_multiplier: int = 1,
225
+ ) -> list:
226
+ """Recognize facial expressions in video using `detector`.
227
+
228
+ Args:
229
+
230
+ detector (fer.FER): facial expression recognizer
231
+ display (bool): show images with cv2.imshow
232
+ output (str): csv or pandas
233
+ frequency (int): inference on every nth frame (higher number is faster)
234
+ max_results (int): number of frames to run inference before stopping
235
+ save_fps (bool): inference frequency = video fps // save_fps
236
+ video_id (str): filename for saving
237
+ save_frames (bool): saves frames to directory
238
+ save_video (bool): saves output video
239
+ annotate_frames (bool): add emotion labels
240
+ zip_images (bool): compress output
241
+ detection_box (dict): dict with bounding box for subimage (xmin, xmax, ymin, ymax)
242
+ lang (str): emotion language that will be shown on video
243
+ include_audio (bool): indicates if a sounded version of the prediction video should be created or not
244
+ size_multiplier (int): increases the size of emotion labels shown in the video by x(size_multiplier)
245
+ Returns:
246
+
247
+ data (list): list of results
248
+
249
+ """
250
+ frames_emotions = []
251
+ if frequency is None:
252
+ frequency = 1
253
+ else:
254
+ frequency = int(frequency)
255
+
256
+ self.display = display
257
+ self.save_frames = save_frames
258
+ self.save_video = save_video
259
+ self.annotate_frames = annotate_frames
260
+
261
+ results_nr = 0
262
+
263
+ # Open video
264
+ assert self.cap.open(self.filepath), "Video capture not opening"
265
+ self.__emotions = detector._get_labels().items()
266
+ self.cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
267
+ pos_frames = self.cap.get(cv2.CAP_PROP_POS_FRAMES)
268
+ assert int(pos_frames) == 0, "Video not at index 0"
269
+
270
+ self.frameCount = 0
271
+ height, width = (
272
+ int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),
273
+ int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
274
+ )
275
+
276
+ fps = self.cap.get(cv2.CAP_PROP_FPS)
277
+ length = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
278
+ assert fps and length, "File {} not loaded".format(self.filepath)
279
+
280
+ if save_fps is not None:
281
+ frequency = fps // save_fps
282
+ log.info("Saving every {} frames".format(frequency))
283
+
284
+ log.info(
285
+ "{:.2f} fps, {} frames, {:.2f} seconds".format(fps, length, length / fps)
286
+ )
287
+
288
+ if self.save_frames:
289
+ os.makedirs(self.outdir, exist_ok=True)
290
+ log.info(f"Making directories at {self.outdir}")
291
+ root, ext = os.path.splitext(os.path.basename(self.filepath))
292
+ outfile = os.path.join(self.outdir, f"{root}_output{ext}")
293
+
294
+ if save_video:
295
+ self.videowriter = self._save_video(outfile, fps, width, height)
296
+
297
+ with logging_redirect_tqdm():
298
+ pbar = tqdm(total=length, unit="frames")
299
+
300
+ while self.cap.isOpened():
301
+ ret, frame = self.cap.read()
302
+ if not ret: # end of video
303
+ break
304
+
305
+ if frame is None:
306
+ log.warn("Empty frame")
307
+ continue
308
+
309
+ if self.frameCount % frequency != 0:
310
+ self.frameCount += 1
311
+ continue
312
+
313
+ if detection_box is not None:
314
+ frame = self._crop(frame, detection_box)
315
+
316
+ # Get faces and detect emotions; coordinates are for unpadded frame
317
+ try:
318
+ faces = detector.detect_emotions(frame)
319
+ except Exception as e:
320
+ log.error(e)
321
+ break
322
+
323
+ # Offset detection_box to include padding
324
+ if detection_box is not None:
325
+ faces = self._offset_detection_box(faces, detection_box)
326
+
327
+ self._increment_frames(frame, faces, video_id, root, lang, size_multiplier)
328
+
329
+ if cv2.waitKey(1) & 0xFF == ord("q"):
330
+ break
331
+
332
+ if faces:
333
+ frames_emotions.append(faces)
334
+
335
+ results_nr += 1
336
+ if max_results and results_nr > max_results:
337
+ break
338
+
339
+ pbar.update(1)
340
+
341
+ pbar.close()
342
+ self._close_video(outfile, save_frames, zip_images)
343
+
344
+ if include_audio:
345
+ audio_suffix = "_audio."
346
+ my_audio = AudioFileClip(self.filepath)
347
+ new_audioclip = CompositeAudioClip([my_audio])
348
+
349
+ my_output_clip = VideoFileClip(outfile)
350
+ my_output_clip.audio = new_audioclip
351
+ my_output_clip.write_videofile(audio_suffix.join(outfile.rsplit(".", 1)))
352
+
353
+ return self.to_format(frames_emotions, output)
354
+
355
+ def to_format(self, data, format):
356
+ """Return data in format."""
357
+ methods_lookup = {"csv": self.to_csv, "pandas": self.to_pandas}
358
+ return methods_lookup[format](data)
359
+
360
+ def _save_video(self, outfile: str, fps: int, width: int, height: int):
361
+ if os.path.isfile(outfile):
362
+ os.remove(outfile)
363
+ log.info("Deleted pre-existing {}".format(outfile))
364
+ if self.tempfile and os.path.isfile(self.tempfile):
365
+ os.remove(self.tempfile)
366
+ fourcc = cv2.VideoWriter_fourcc("m", "p", "4", "v")
367
+ videowriter = cv2.VideoWriter(
368
+ self.tempfile or outfile, fourcc, fps, (width, height), True
369
+ )
370
+ return videowriter
371
+
372
+ @staticmethod
373
+ def _crop(frame, detection_box):
374
+ crop_frame = frame[
375
+ detection_box.get("y_min") : detection_box.get("y_max"),
376
+ detection_box.get("x_min") : detection_box.get("x_max"),
377
+ ]
378
+ return crop_frame
379
+
380
+ def __del__(self):
381
+ cv2.destroyAllWindows()
emotionsmultilanguage.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ When you add new language translation, you need to add the translations for each key element (angry, disgust, fear, happy, sad, suprise, netural)
3
+ with the corresponding language key. Please be careful about the English characters. I.e. Wutend is originally Wütend but since 'ü' is not in
4
+ en alphabet we should change it to 'u'
5
+
6
+ Languages Added:
7
+ "en": English -- It's default language and no need to be added again. Program will read en values from keys of this dictionary
8
+ "tr": Turkish (Türkçe)
9
+ "de": German (Deutsch)
10
+ """
11
+
12
+ emotions_dict = {
13
+ "angry": {"tr": "Kizgin", "de": "Wutend"},
14
+ "disgust": {"tr": "Igrenme", "de": "der Ekel"},
15
+ "fear": {"tr": "Korku", "de": "Furcht"},
16
+ "happy": {"tr": "Mutluluk", "de": "Glucklich"},
17
+ "sad": {"tr": "Uzuntu", "de": "Traurig"},
18
+ "surprise": {"tr": "Saskinlik", "de": "Uberraschung"},
19
+ "neutral": {"tr": "Notr", "de": "Neutral"},
20
+ }
fer.py ADDED
@@ -0,0 +1,351 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ # MIT License
5
+ #
6
+ # Copyright (c) 2018 Justin Shenk
7
+ #
8
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
9
+ # of this software and associated documentation files (the "Software"), to deal
10
+ # in the Software without restriction, including without limitation the rights
11
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12
+ # copies of the Software, and to permit persons to whom the Software is
13
+ # furnished to do so, subject to the following conditions:
14
+ #
15
+ # The above copyright notice and this permission notice shall be included in all
16
+ # copies or substantial portions of the Software.
17
+ #
18
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
+ # SOFTWARE.
25
+
26
+ # IMPORTANT:
27
+ #
28
+ # This code is derived from Iván de Paz Centeno's implementation of MTCNN
29
+ # (https://github.com/ipazc/mtcnn/) and Octavia Arriaga's facial expression recognition repo
30
+ # (https://github.com/oarriaga/face_classification).
31
+ #
32
+ import logging
33
+ import os
34
+ import pkg_resources
35
+ import requests
36
+ import sys
37
+ from typing import Sequence, Tuple, Union
38
+
39
+ import cv2
40
+ import numpy as np
41
+
42
+ from tensorflow.keras.models import load_model
43
+
44
+
45
+ from .utils import load_image
46
+
47
+ logging.basicConfig(level=logging.INFO)
48
+ log = logging.getLogger("fer")
49
+
50
+ NumpyRects = Union[np.ndarray, Sequence[Tuple[int, int, int, int]]]
51
+
52
+ __author__ = "Justin Shenk"
53
+
54
+ PADDING = 40
55
+ SERVER_URL = "http://localhost:8501/v1/models/emotion_model:predict"
56
+
57
+
58
+ class FER(object):
59
+ """
60
+ Allows performing Facial Expression Recognition ->
61
+ a) Detection of faces
62
+ b) Detection of emotions
63
+ """
64
+
65
+ def __init__(
66
+ self,
67
+ cascade_file: str = None,
68
+ mtcnn=False,
69
+ tfserving: bool = False,
70
+ scale_factor: float = 1.1,
71
+ min_face_size: int = 50,
72
+ min_neighbors: int = 5,
73
+ offsets: tuple = (10, 10),
74
+ ):
75
+ """
76
+ Initializes the face detector and Keras model for facial expression recognition.
77
+ :param cascade_file: file URI with the Haar cascade for face classification
78
+ :param mtcnn: use MTCNN network for face detection (not yet implemented)
79
+ :param scale_factor: parameter specifying how much the image size is reduced at each image scale
80
+ :param min_face_size: minimum size of the face to detect
81
+ :param offsets: padding around face before classification
82
+ """
83
+ self.__scale_factor = scale_factor
84
+ self.__min_neighbors = min_neighbors
85
+ self.__min_face_size = min_face_size
86
+ self.__offsets = offsets
87
+ self.tfserving = tfserving
88
+
89
+ if cascade_file is None:
90
+ cascade_file = cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
91
+
92
+ if mtcnn:
93
+ try:
94
+ from facenet_pytorch import MTCNN
95
+ except ImportError:
96
+ raise Exception(
97
+ "MTCNN not installed, install it with pip install facenet-pytorch and from facenet_pytorch import MTCNN"
98
+ )
99
+ self.__face_detector = "mtcnn"
100
+ self._mtcnn = MTCNN(keep_all=True)
101
+ else:
102
+ self.__face_detector = cv2.CascadeClassifier(cascade_file)
103
+
104
+ self._initialize_model()
105
+
106
+ def _initialize_model(self):
107
+ if self.tfserving:
108
+ self.__emotion_target_size = (64, 64) # hardcoded for now
109
+ else:
110
+ # Local Keras model
111
+ emotion_model = pkg_resources.resource_filename(
112
+ "fer", "data/emotion_model.hdf5"
113
+ )
114
+ log.debug("Emotion model: {}".format(emotion_model))
115
+ self.__emotion_classifier = load_model(emotion_model, compile=False)
116
+ self.__emotion_classifier.make_predict_function()
117
+ self.__emotion_target_size = self.__emotion_classifier.input_shape[1:3]
118
+ return
119
+
120
+ def _classify_emotions(self, gray_faces: np.ndarray) -> np.ndarray: # b x w x h
121
+ """Run faces through online or offline classifier."""
122
+ if self.tfserving:
123
+ gray_faces = np.expand_dims(gray_faces, -1) # to 4-dimensions
124
+ instances = gray_faces.tolist()
125
+ response = requests.post(SERVER_URL, json={"instances": instances})
126
+ response.raise_for_status()
127
+
128
+ emotion_predictions = response.json()["predictions"]
129
+ return emotion_predictions
130
+ else:
131
+ return self.__emotion_classifier(gray_faces)
132
+
133
+ @staticmethod
134
+ def pad(image):
135
+ """Pad image."""
136
+ row, col = image.shape[:2]
137
+ bottom = image[row - 2 : row, 0:col]
138
+ mean = cv2.mean(bottom)[0]
139
+
140
+ padded_image = cv2.copyMakeBorder(
141
+ image,
142
+ top=PADDING,
143
+ bottom=PADDING,
144
+ left=PADDING,
145
+ right=PADDING,
146
+ borderType=cv2.BORDER_CONSTANT,
147
+ value=[mean, mean, mean],
148
+ )
149
+ return padded_image
150
+
151
+ @staticmethod
152
+ def depad(image):
153
+ row, col = image.shape[:2]
154
+ return image[PADDING : row - PADDING, PADDING : col - PADDING]
155
+
156
+ @staticmethod
157
+ def tosquare(bbox):
158
+ """Convert bounding box to square by elongating shorter side."""
159
+ x, y, w, h = bbox
160
+ if h > w:
161
+ diff = h - w
162
+ x -= diff // 2
163
+ w += diff
164
+ elif w > h:
165
+ diff = w - h
166
+ y -= diff // 2
167
+ h += diff
168
+ if w != h:
169
+ log.debug(f"{w} is not {h}")
170
+
171
+ return (x, y, w, h)
172
+
173
+ def find_faces(self, img: np.ndarray, bgr=True) -> list:
174
+ """Image to list of faces bounding boxes(x,y,w,h)"""
175
+ if isinstance(self.__face_detector, cv2.CascadeClassifier):
176
+ if bgr:
177
+ gray_image_array = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
178
+ else: # assume gray
179
+ gray_image_array = img
180
+
181
+ faces = self.__face_detector.detectMultiScale(
182
+ gray_image_array,
183
+ scaleFactor=self.__scale_factor,
184
+ minNeighbors=self.__min_neighbors,
185
+ flags=cv2.CASCADE_SCALE_IMAGE,
186
+ minSize=(self.__min_face_size, self.__min_face_size),
187
+ )
188
+ elif self.__face_detector == "mtcnn":
189
+ boxes, probs = self._mtcnn.detect(img)
190
+ faces = []
191
+ if type(boxes) == np.ndarray:
192
+ for face in boxes:
193
+ faces.append(
194
+ [
195
+ int(face[0]),
196
+ int(face[1]),
197
+ int(face[2]) - int(face[0]),
198
+ int(face[3]) - int(face[1]),
199
+ ]
200
+ )
201
+
202
+ return faces
203
+
204
+ @staticmethod
205
+ def __preprocess_input(x, v2=False):
206
+ x = x.astype("float32")
207
+ x = x / 255.0
208
+ if v2:
209
+ x = x - 0.5
210
+ x = x * 2.0
211
+ return x
212
+
213
+ def __apply_offsets(self, face_coordinates):
214
+ """Offset face coordinates with padding before classification.
215
+ x1, x2, y1, y2 = 0, 100, 0, 100 becomes -10, 110, -10, 110
216
+ """
217
+ x, y, width, height = face_coordinates
218
+ x_off, y_off = self.__offsets
219
+ x1 = x - x_off
220
+ x2 = x + width + x_off
221
+ y1 = y - y_off
222
+ y2 = y + height + y_off
223
+ return x1, x2, y1, y2
224
+
225
+ @staticmethod
226
+ def _get_labels():
227
+ return {
228
+ 0: "angry",
229
+ 1: "disgust",
230
+ 2: "fear",
231
+ 3: "happy",
232
+ 4: "sad",
233
+ 5: "surprise",
234
+ 6: "neutral",
235
+ }
236
+
237
+ def detect_emotions(
238
+ self, img: np.ndarray, face_rectangles: NumpyRects = None
239
+ ) -> list:
240
+ """
241
+ Detects bounding boxes from the specified image with ranking of emotions.
242
+ :param img: exact image path, numpy array (BGR or gray) or based64 encoded images
243
+ could be passed.
244
+ :return: list containing all the bounding boxes detected with their emotions.
245
+ """
246
+ img = load_image(img)
247
+
248
+ emotion_labels = self._get_labels()
249
+
250
+ if face_rectangles is None:
251
+ face_rectangles = self.find_faces(img, bgr=True)
252
+
253
+ gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
254
+ gray_img = self.pad(gray_img)
255
+
256
+ emotions = []
257
+ gray_faces = []
258
+ if face_rectangles is not None:
259
+ for face_coordinates in face_rectangles:
260
+ face_coordinates = self.tosquare(face_coordinates)
261
+
262
+ # offset to expand bounding box
263
+ # Note: x1 and y1 can be negative
264
+ x1, x2, y1, y2 = self.__apply_offsets(face_coordinates)
265
+
266
+ # account for padding in bounding box coordinates
267
+ x1 += PADDING
268
+ y1 += PADDING
269
+ x2 += PADDING
270
+ y2 += PADDING
271
+ x1 = np.clip(x1, a_min=0, a_max=None)
272
+ y1 = np.clip(y1, a_min=0, a_max=None)
273
+
274
+ gray_face = gray_img[max(0, y1) : y2, max(0, x1) : x2]
275
+
276
+ try:
277
+ gray_face = cv2.resize(gray_face, self.__emotion_target_size)
278
+ except Exception as e:
279
+ log.warn("{} resize failed: {}".format(gray_face.shape, e))
280
+ continue
281
+
282
+ # Local Keras model
283
+ gray_face = self.__preprocess_input(gray_face, True)
284
+ gray_faces.append(gray_face)
285
+
286
+ # predict all faces
287
+ if not len(gray_faces):
288
+ return emotions # no valid faces
289
+
290
+ # classify emotions
291
+ emotion_predictions = self._classify_emotions(np.array(gray_faces))
292
+
293
+ # label scores
294
+ for face_idx, face in enumerate(emotion_predictions):
295
+ labelled_emotions = {
296
+ emotion_labels[idx]: round(float(score), 2)
297
+ for idx, score in enumerate(face)
298
+ }
299
+
300
+ emotions.append(
301
+ dict(box=face_rectangles[face_idx], emotions=labelled_emotions)
302
+ )
303
+
304
+ self.emotions = emotions
305
+
306
+ return emotions
307
+
308
+ def top_emotion(
309
+ self, img: np.ndarray
310
+ ) -> Tuple[Union[str, None], Union[float, None]]:
311
+ """Convenience wrapper for `detect_emotions` returning only top emotion for first face in frame.
312
+ :param img: image to process
313
+ :return: top emotion and score (for first face in frame) or (None, None)
314
+
315
+ """
316
+ emotions = self.detect_emotions(img=img)
317
+ top_emotions = [
318
+ max(e["emotions"], key=lambda key: e["emotions"][key]) for e in emotions
319
+ ]
320
+
321
+ # Take first face
322
+ if len(top_emotions):
323
+ top_emotion = top_emotions[0]
324
+ else:
325
+ return (None, None)
326
+ score = emotions[0]["emotions"][top_emotion]
327
+
328
+ return top_emotion, score
329
+
330
+
331
+ def parse_arguments(args):
332
+ import argparse
333
+
334
+ parser = argparse.ArgumentParser()
335
+ parser.add_argument("--image", type=str, help="Image filepath")
336
+ return parser.parse_args()
337
+
338
+
339
+ def top_emotion():
340
+ args = parse_arguments(sys.argv)
341
+ fer = FER()
342
+ top_emotion, score = fer.top_emotion(args.image)
343
+ print(top_emotion, score)
344
+
345
+
346
+ def main():
347
+ top_emotion()
348
+
349
+
350
+ if __name__ == "__main__":
351
+ main()
utils.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import os
3
+ import requests
4
+
5
+ import cv2
6
+ import numpy as np
7
+ from PIL import Image
8
+
9
+ from .exceptions import InvalidImage
10
+ from .emotionsmultilanguage import emotions_dict
11
+
12
+
13
+ def draw_annotations(
14
+ frame: np.ndarray,
15
+ faces: list,
16
+ boxes=True,
17
+ scores=True,
18
+ color: tuple = (0, 155, 255),
19
+ lang: str = "en",
20
+ size_multiplier: int = 1,
21
+ ) -> np.ndarray:
22
+ """Draws boxes around detected faces. Faces is a list of dicts with `box` and `emotions`."""
23
+ if not len(faces):
24
+ return frame
25
+
26
+ for face in faces:
27
+ x, y, w, h = face["box"]
28
+ emotions = face["emotions"]
29
+
30
+ if boxes:
31
+ cv2.rectangle(
32
+ frame,
33
+ (x, y, w, h),
34
+ color,
35
+ 2,
36
+ )
37
+
38
+ if scores:
39
+ frame = draw_scores(frame, emotions, (x, y, w, h), lang, size_multiplier)
40
+ return frame
41
+
42
+
43
+ def loadBase64Img(uri):
44
+ encoded_data = uri.split(",")[1]
45
+ nparr = np.fromstring(base64.b64decode(encoded_data), np.uint8)
46
+ img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
47
+ return img
48
+
49
+
50
+ def pil_to_bgr(pil_image):
51
+ return cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
52
+
53
+
54
+ def load_image(img):
55
+ """Modified from github.com/serengil/deepface. Returns bgr (opencv-style) numpy array."""
56
+ is_exact_image = is_base64_img = is_url_img = False
57
+
58
+ if type(img).__module__ == np.__name__:
59
+ is_exact_image = True
60
+ elif img is None:
61
+ raise InvalidImage("Image not valid.")
62
+ elif len(img) > 11 and img[0:11] == "data:image/":
63
+ is_base64_img = True
64
+ elif len(img) > 11 and img.startswith("http"):
65
+ is_url_img = True
66
+
67
+ if is_base64_img:
68
+ img = loadBase64Img(img)
69
+ elif is_url_img:
70
+ img = pil_to_bgr(Image.open(requests.get(img, stream=True).raw))
71
+ elif not is_exact_image: # image path passed as input
72
+ if not os.path.isfile(img):
73
+ raise ValueError(f"Confirm that {img} exists")
74
+ img = cv2.imread(img)
75
+
76
+ if img is None or not hasattr(img, "shape"):
77
+ raise InvalidImage("Image not valid.")
78
+
79
+ return img
80
+
81
+
82
+ def draw_scores(
83
+ frame: np.ndarray,
84
+ emotions: dict,
85
+ bounding_box: dict,
86
+ lang: str = "en",
87
+ size_multiplier: int = 1,
88
+ ) -> np.ndarray:
89
+ """Draw scores for each emotion under faces."""
90
+ GRAY = (211, 211, 211)
91
+ GREEN = (0, 255, 0)
92
+ x, y, w, h = bounding_box
93
+
94
+ for idx, (emotion, score) in enumerate(emotions.items()):
95
+ color = GRAY if score < 0.01 else GREEN
96
+
97
+ if lang != "en":
98
+ emotion = emotions_dict[emotion][lang]
99
+
100
+ emotion_score = "{}: {}".format(
101
+ emotion, "{:.2f}".format(score) if score >= 0.01 else ""
102
+ )
103
+ cv2.putText(
104
+ frame,
105
+ emotion_score,
106
+ (
107
+ x,
108
+ y + h + (15 * size_multiplier) + idx * (15 * size_multiplier),
109
+ ),
110
+ cv2.FONT_HERSHEY_SIMPLEX,
111
+ 0.5 * size_multiplier,
112
+ color,
113
+ 1 * size_multiplier,
114
+ cv2.LINE_AA,
115
+ )
116
+ return frame