Spaces:
Sleeping
Sleeping
Delete classes.py
Browse files- classes.py +0 -381
classes.py
DELETED
@@ -1,381 +0,0 @@
|
|
1 |
-
#!/usr/bin/env python3
|
2 |
-
# -*- coding: utf-8 -*-
|
3 |
-
import csv
|
4 |
-
import logging
|
5 |
-
import os
|
6 |
-
import re
|
7 |
-
from moviepy import VideoFileClip,AudioFileClip, CompositeAudioClip
|
8 |
-
from pathlib import Path
|
9 |
-
from typing import Optional, Union
|
10 |
-
from zipfile import ZipFile
|
11 |
-
|
12 |
-
import cv2
|
13 |
-
import pandas as pd
|
14 |
-
|
15 |
-
from tqdm import tqdm
|
16 |
-
from tqdm.contrib.logging import logging_redirect_tqdm
|
17 |
-
|
18 |
-
from .utils import draw_annotations
|
19 |
-
|
20 |
-
log = logging.getLogger("fer")
|
21 |
-
|
22 |
-
|
23 |
-
class Video(object):
|
24 |
-
def __init__(
|
25 |
-
self,
|
26 |
-
video_file: str,
|
27 |
-
outdir: str = "output",
|
28 |
-
first_face_only: bool = True,
|
29 |
-
tempfile: Optional[str] = None,
|
30 |
-
):
|
31 |
-
"""Video class for extracting and saving frames for emotion detection.
|
32 |
-
:param video_file - str
|
33 |
-
:param outdir - str
|
34 |
-
:param tempdir - str
|
35 |
-
:param first_face_only - bool
|
36 |
-
:param tempfile - str
|
37 |
-
"""
|
38 |
-
assert os.path.exists(video_file), "Video file not found at {}".format(
|
39 |
-
os.path.abspath(video_file)
|
40 |
-
)
|
41 |
-
self.cap = cv2.VideoCapture(video_file)
|
42 |
-
if not os.path.isdir(outdir):
|
43 |
-
os.makedirs(outdir, exist_ok=True)
|
44 |
-
self.outdir = outdir
|
45 |
-
|
46 |
-
if not first_face_only:
|
47 |
-
log.error("Only single-face charting is implemented")
|
48 |
-
self.first_face_only = first_face_only
|
49 |
-
self.tempfile = tempfile
|
50 |
-
self.filepath = video_file
|
51 |
-
self.filename = "".join(self.filepath.split("/")[-1])
|
52 |
-
|
53 |
-
@staticmethod
|
54 |
-
def get_max_faces(data: list) -> int:
|
55 |
-
"""Get max number of faces detected in a series of frames, eg 3"""
|
56 |
-
max = 0
|
57 |
-
for frame in data:
|
58 |
-
for face in frame:
|
59 |
-
if len(face) > max:
|
60 |
-
max = len(face)
|
61 |
-
return max
|
62 |
-
|
63 |
-
@staticmethod
|
64 |
-
def _to_dict(data: Union[dict, list]) -> dict:
|
65 |
-
emotions = []
|
66 |
-
|
67 |
-
frame = data[0]
|
68 |
-
if isinstance(frame, list):
|
69 |
-
try:
|
70 |
-
emotions = frame[0]["emotions"].keys()
|
71 |
-
except IndexError:
|
72 |
-
raise Exception("No data in 'data'")
|
73 |
-
elif isinstance(frame, dict):
|
74 |
-
return data
|
75 |
-
|
76 |
-
dictlist = []
|
77 |
-
|
78 |
-
for data_idx, frame in enumerate(data):
|
79 |
-
rowdict = {}
|
80 |
-
for idx, face in enumerate(list(frame)):
|
81 |
-
if not isinstance(face, dict):
|
82 |
-
break
|
83 |
-
rowdict.update({"box" + str(idx): face["box"]})
|
84 |
-
rowdict.update(
|
85 |
-
{emo + str(idx): face["emotions"][emo] for emo in emotions}
|
86 |
-
)
|
87 |
-
dictlist.append(rowdict)
|
88 |
-
return dictlist
|
89 |
-
|
90 |
-
def to_pandas(self, data: Union[pd.DataFrame, list]) -> pd.DataFrame:
|
91 |
-
"""Convert results to pandas DataFrame"""
|
92 |
-
if isinstance(data, pd.DataFrame):
|
93 |
-
return data
|
94 |
-
|
95 |
-
if not len(data):
|
96 |
-
return pd.DataFrame()
|
97 |
-
datalist = self._to_dict(data)
|
98 |
-
df = pd.DataFrame(datalist)
|
99 |
-
if self.first_face_only:
|
100 |
-
df = self.get_first_face(df)
|
101 |
-
return df
|
102 |
-
|
103 |
-
@staticmethod
|
104 |
-
def get_first_face(df: pd.DataFrame) -> pd.DataFrame:
|
105 |
-
assert isinstance(df, pd.DataFrame), "Must be a pandas DataFrame"
|
106 |
-
try:
|
107 |
-
int(df.columns[0][-1])
|
108 |
-
except ValueError:
|
109 |
-
# Already only one face in df
|
110 |
-
return df
|
111 |
-
|
112 |
-
columns = [x for x in df.columns if x[-1] == "0"]
|
113 |
-
new_columns = [x[:-1] for x in columns]
|
114 |
-
single_df = df[columns]
|
115 |
-
single_df.columns = new_columns
|
116 |
-
return single_df
|
117 |
-
|
118 |
-
@staticmethod
|
119 |
-
def get_emotions(df: pd.DataFrame) -> list:
|
120 |
-
"""Get emotion columsn from results."""
|
121 |
-
columns = [x for x in df.columns if "box" not in x]
|
122 |
-
return df[columns]
|
123 |
-
|
124 |
-
def to_csv(self, data, filename="data.csv"):
|
125 |
-
"""Save data to csv"""
|
126 |
-
|
127 |
-
def key(item):
|
128 |
-
key_pat = re.compile(r"^(\D+)(\d+)$")
|
129 |
-
m = key_pat.match(item)
|
130 |
-
return m.group(1), int(m.group(2))
|
131 |
-
|
132 |
-
dictlist = self._to_dict(data)
|
133 |
-
columns = set().union(*(d.keys() for d in dictlist))
|
134 |
-
columns = sorted(columns, key=key) # sort by trailing number (faces)
|
135 |
-
|
136 |
-
with open("data.csv", "w", newline="") as csvfile:
|
137 |
-
writer = csv.DictWriter(csvfile, columns, lineterminator="\n")
|
138 |
-
writer.writeheader()
|
139 |
-
writer.writerows(dictlist)
|
140 |
-
return dictlist
|
141 |
-
|
142 |
-
def _close_video(self, outfile, save_frames, zip_images):
|
143 |
-
self.cap.release()
|
144 |
-
if self.display or self.save_video:
|
145 |
-
self.videowriter.release()
|
146 |
-
|
147 |
-
if self.save_video:
|
148 |
-
log.info("Completed analysis: saved to {}".format(self.tempfile or outfile))
|
149 |
-
if self.tempfile:
|
150 |
-
os.replace(self.tempfile, outfile)
|
151 |
-
|
152 |
-
if save_frames and zip_images:
|
153 |
-
log.info("Starting to Zip")
|
154 |
-
outdir = Path(self.outdir)
|
155 |
-
zip_dir = outdir / "images.zip"
|
156 |
-
images = sorted(list(outdir.glob("*.jpg")))
|
157 |
-
total = len(images)
|
158 |
-
i = 0
|
159 |
-
with ZipFile(zip_dir, "w") as zip:
|
160 |
-
for file in images:
|
161 |
-
zip.write(file, arcname=file.name)
|
162 |
-
os.remove(file)
|
163 |
-
i += 1
|
164 |
-
if i % 50 == 0:
|
165 |
-
log.info(f"Compressing: {i*100 // total}%")
|
166 |
-
log.info("Zip has finished")
|
167 |
-
|
168 |
-
def _offset_detection_box(self, faces, detection_box):
|
169 |
-
for face in faces:
|
170 |
-
original_box = face.get("box")
|
171 |
-
face["box"] = (
|
172 |
-
original_box[0] + detection_box.get("x_min"),
|
173 |
-
original_box[1] + detection_box.get("y_min"),
|
174 |
-
original_box[2],
|
175 |
-
original_box[3],
|
176 |
-
)
|
177 |
-
return faces
|
178 |
-
|
179 |
-
def _increment_frames(
|
180 |
-
self, frame, faces, video_id, root, lang="en", size_multiplier=1
|
181 |
-
):
|
182 |
-
# Save images to `self.outdir`
|
183 |
-
imgpath = os.path.join(
|
184 |
-
self.outdir, (video_id or root) + str(self.frameCount) + ".jpg"
|
185 |
-
)
|
186 |
-
|
187 |
-
if self.annotate_frames:
|
188 |
-
frame = draw_annotations(
|
189 |
-
frame,
|
190 |
-
faces,
|
191 |
-
boxes=True,
|
192 |
-
scores=True,
|
193 |
-
lang=lang,
|
194 |
-
size_multiplier=size_multiplier,
|
195 |
-
)
|
196 |
-
|
197 |
-
if self.save_frames:
|
198 |
-
cv2.imwrite(imgpath, frame)
|
199 |
-
|
200 |
-
if self.display:
|
201 |
-
cv2.imshow("Video", frame)
|
202 |
-
|
203 |
-
if self.save_video:
|
204 |
-
self.videowriter.write(frame)
|
205 |
-
|
206 |
-
self.frameCount += 1
|
207 |
-
|
208 |
-
def analyze(
|
209 |
-
self,
|
210 |
-
detector, # fer.FER instance
|
211 |
-
display: bool = False,
|
212 |
-
output: str = "csv",
|
213 |
-
frequency: Optional[int] = None,
|
214 |
-
max_results: int = None,
|
215 |
-
save_fps: Optional[int] = None,
|
216 |
-
video_id: Optional[str] = None,
|
217 |
-
save_frames: bool = True,
|
218 |
-
save_video: bool = True,
|
219 |
-
annotate_frames: bool = True,
|
220 |
-
zip_images: bool = True,
|
221 |
-
detection_box: Optional[dict] = None,
|
222 |
-
lang: str = "en",
|
223 |
-
include_audio: bool = False,
|
224 |
-
size_multiplier: int = 1,
|
225 |
-
) -> list:
|
226 |
-
"""Recognize facial expressions in video using `detector`.
|
227 |
-
|
228 |
-
Args:
|
229 |
-
|
230 |
-
detector (fer.FER): facial expression recognizer
|
231 |
-
display (bool): show images with cv2.imshow
|
232 |
-
output (str): csv or pandas
|
233 |
-
frequency (int): inference on every nth frame (higher number is faster)
|
234 |
-
max_results (int): number of frames to run inference before stopping
|
235 |
-
save_fps (bool): inference frequency = video fps // save_fps
|
236 |
-
video_id (str): filename for saving
|
237 |
-
save_frames (bool): saves frames to directory
|
238 |
-
save_video (bool): saves output video
|
239 |
-
annotate_frames (bool): add emotion labels
|
240 |
-
zip_images (bool): compress output
|
241 |
-
detection_box (dict): dict with bounding box for subimage (xmin, xmax, ymin, ymax)
|
242 |
-
lang (str): emotion language that will be shown on video
|
243 |
-
include_audio (bool): indicates if a sounded version of the prediction video should be created or not
|
244 |
-
size_multiplier (int): increases the size of emotion labels shown in the video by x(size_multiplier)
|
245 |
-
Returns:
|
246 |
-
|
247 |
-
data (list): list of results
|
248 |
-
|
249 |
-
"""
|
250 |
-
frames_emotions = []
|
251 |
-
if frequency is None:
|
252 |
-
frequency = 1
|
253 |
-
else:
|
254 |
-
frequency = int(frequency)
|
255 |
-
|
256 |
-
self.display = display
|
257 |
-
self.save_frames = save_frames
|
258 |
-
self.save_video = save_video
|
259 |
-
self.annotate_frames = annotate_frames
|
260 |
-
|
261 |
-
results_nr = 0
|
262 |
-
|
263 |
-
# Open video
|
264 |
-
assert self.cap.open(self.filepath), "Video capture not opening"
|
265 |
-
self.__emotions = detector._get_labels().items()
|
266 |
-
self.cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
|
267 |
-
pos_frames = self.cap.get(cv2.CAP_PROP_POS_FRAMES)
|
268 |
-
assert int(pos_frames) == 0, "Video not at index 0"
|
269 |
-
|
270 |
-
self.frameCount = 0
|
271 |
-
height, width = (
|
272 |
-
int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),
|
273 |
-
int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
|
274 |
-
)
|
275 |
-
|
276 |
-
fps = self.cap.get(cv2.CAP_PROP_FPS)
|
277 |
-
length = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
278 |
-
assert fps and length, "File {} not loaded".format(self.filepath)
|
279 |
-
|
280 |
-
if save_fps is not None:
|
281 |
-
frequency = fps // save_fps
|
282 |
-
log.info("Saving every {} frames".format(frequency))
|
283 |
-
|
284 |
-
log.info(
|
285 |
-
"{:.2f} fps, {} frames, {:.2f} seconds".format(fps, length, length / fps)
|
286 |
-
)
|
287 |
-
|
288 |
-
if self.save_frames:
|
289 |
-
os.makedirs(self.outdir, exist_ok=True)
|
290 |
-
log.info(f"Making directories at {self.outdir}")
|
291 |
-
root, ext = os.path.splitext(os.path.basename(self.filepath))
|
292 |
-
outfile = os.path.join(self.outdir, f"{root}_output{ext}")
|
293 |
-
|
294 |
-
if save_video:
|
295 |
-
self.videowriter = self._save_video(outfile, fps, width, height)
|
296 |
-
|
297 |
-
with logging_redirect_tqdm():
|
298 |
-
pbar = tqdm(total=length, unit="frames")
|
299 |
-
|
300 |
-
while self.cap.isOpened():
|
301 |
-
ret, frame = self.cap.read()
|
302 |
-
if not ret: # end of video
|
303 |
-
break
|
304 |
-
|
305 |
-
if frame is None:
|
306 |
-
log.warn("Empty frame")
|
307 |
-
continue
|
308 |
-
|
309 |
-
if self.frameCount % frequency != 0:
|
310 |
-
self.frameCount += 1
|
311 |
-
continue
|
312 |
-
|
313 |
-
if detection_box is not None:
|
314 |
-
frame = self._crop(frame, detection_box)
|
315 |
-
|
316 |
-
# Get faces and detect emotions; coordinates are for unpadded frame
|
317 |
-
try:
|
318 |
-
faces = detector.detect_emotions(frame)
|
319 |
-
except Exception as e:
|
320 |
-
log.error(e)
|
321 |
-
break
|
322 |
-
|
323 |
-
# Offset detection_box to include padding
|
324 |
-
if detection_box is not None:
|
325 |
-
faces = self._offset_detection_box(faces, detection_box)
|
326 |
-
|
327 |
-
self._increment_frames(frame, faces, video_id, root, lang, size_multiplier)
|
328 |
-
|
329 |
-
if cv2.waitKey(1) & 0xFF == ord("q"):
|
330 |
-
break
|
331 |
-
|
332 |
-
if faces:
|
333 |
-
frames_emotions.append(faces)
|
334 |
-
|
335 |
-
results_nr += 1
|
336 |
-
if max_results and results_nr > max_results:
|
337 |
-
break
|
338 |
-
|
339 |
-
pbar.update(1)
|
340 |
-
|
341 |
-
pbar.close()
|
342 |
-
self._close_video(outfile, save_frames, zip_images)
|
343 |
-
|
344 |
-
if include_audio:
|
345 |
-
audio_suffix = "_audio."
|
346 |
-
my_audio = AudioFileClip(self.filepath)
|
347 |
-
new_audioclip = CompositeAudioClip([my_audio])
|
348 |
-
|
349 |
-
my_output_clip = VideoFileClip(outfile)
|
350 |
-
my_output_clip.audio = new_audioclip
|
351 |
-
my_output_clip.write_videofile(audio_suffix.join(outfile.rsplit(".", 1)))
|
352 |
-
|
353 |
-
return self.to_format(frames_emotions, output)
|
354 |
-
|
355 |
-
def to_format(self, data, format):
|
356 |
-
"""Return data in format."""
|
357 |
-
methods_lookup = {"csv": self.to_csv, "pandas": self.to_pandas}
|
358 |
-
return methods_lookup[format](data)
|
359 |
-
|
360 |
-
def _save_video(self, outfile: str, fps: int, width: int, height: int):
|
361 |
-
if os.path.isfile(outfile):
|
362 |
-
os.remove(outfile)
|
363 |
-
log.info("Deleted pre-existing {}".format(outfile))
|
364 |
-
if self.tempfile and os.path.isfile(self.tempfile):
|
365 |
-
os.remove(self.tempfile)
|
366 |
-
fourcc = cv2.VideoWriter_fourcc("m", "p", "4", "v")
|
367 |
-
videowriter = cv2.VideoWriter(
|
368 |
-
self.tempfile or outfile, fourcc, fps, (width, height), True
|
369 |
-
)
|
370 |
-
return videowriter
|
371 |
-
|
372 |
-
@staticmethod
|
373 |
-
def _crop(frame, detection_box):
|
374 |
-
crop_frame = frame[
|
375 |
-
detection_box.get("y_min") : detection_box.get("y_max"),
|
376 |
-
detection_box.get("x_min") : detection_box.get("x_max"),
|
377 |
-
]
|
378 |
-
return crop_frame
|
379 |
-
|
380 |
-
def __del__(self):
|
381 |
-
cv2.destroyAllWindows()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|