reab5555 commited on
Commit
5565ca5
·
verified ·
1 Parent(s): 2d26eaf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -75
app.py CHANGED
@@ -4,37 +4,42 @@ import numpy as np
4
  import torch
5
  import torch.nn as nn
6
  import torch.optim as optim
 
7
  from facenet_pytorch import InceptionResnetV1, MTCNN
8
  import mediapipe as mp
9
  from fer import FER
10
  from sklearn.cluster import KMeans
11
  from sklearn.preprocessing import StandardScaler, MinMaxScaler
12
- from sklearn.metrics import silhouette_score
13
- from scipy.spatial.distance import cdist
14
  import umap
15
  import pandas as pd
16
  import matplotlib.pyplot as plt
17
  from matplotlib.ticker import MaxNLocator
 
 
18
  import gradio as gr
19
  import tempfile
20
  import shutil
21
  import subprocess
22
  import fractions
23
 
 
24
  # Suppress TensorFlow warnings
25
  os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
26
  import tensorflow as tf
 
27
  tf.get_logger().setLevel('ERROR')
28
 
29
  # Initialize models and other global variables
30
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
31
 
32
- mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.999, 0.999, 0.999], min_face_size=100, selection_method='largest')
 
33
  model = InceptionResnetV1(pretrained='vggface2').eval().to(device)
34
  mp_face_mesh = mp.solutions.face_mesh
35
  face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.5)
36
  emotion_detector = FER(mtcnn=False)
37
 
 
38
  def frame_to_timecode(frame_num, original_fps, desired_fps):
39
  total_seconds = frame_num / original_fps
40
  hours = int(total_seconds // 3600)
@@ -43,6 +48,7 @@ def frame_to_timecode(frame_num, original_fps, desired_fps):
43
  milliseconds = int((total_seconds - int(total_seconds)) * 1000)
44
  return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
45
 
 
46
  def get_face_embedding_and_emotion(face_img):
47
  face_tensor = torch.tensor(face_img).permute(2, 0, 1).unsqueeze(0).float() / 255
48
  face_tensor = (face_tensor - 0.5) / 0.5
@@ -58,6 +64,7 @@ def get_face_embedding_and_emotion(face_img):
58
 
59
  return embedding.cpu().numpy().flatten(), emotion_dict
60
 
 
61
  def alignFace(img):
62
  img_raw = img.copy()
63
  results = face_mesh.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
@@ -83,53 +90,51 @@ def alignFace(img):
83
  new_img = cv2.warpAffine(img_raw, rotation_matrix, (width, height))
84
  return new_img
85
 
86
- def extract_frames(video_path, output_folder, fps):
 
87
  os.makedirs(output_folder, exist_ok=True)
88
- command = [
89
- 'ffmpeg',
90
- '-i', video_path,
91
- '-vf', f'fps={fps}',
92
- f'{output_folder}/frame_%04d.jpg'
93
- ]
94
- try:
95
- result = subprocess.run(command, check=True, capture_output=True, text=True)
96
- print(f"FFmpeg stdout: {result.stdout}")
97
- print(f"FFmpeg stderr: {result.stderr}")
98
- except subprocess.CalledProcessError as e:
99
- print(f"Error extracting frames: {e}")
100
- print(f"FFmpeg stdout: {e.stdout}")
101
- print(f"FFmpeg stderr: {e.stderr}")
102
- raise
103
-
104
- def get_video_info(video_path):
105
- ffprobe_command = [
106
- 'ffprobe',
107
- '-v', 'error',
108
- '-select_streams', 'v:0',
109
- '-count_packets',
110
- '-show_entries', 'stream=nb_read_packets,r_frame_rate',
111
- '-of', 'csv=p=0',
112
- video_path
113
- ]
114
- ffprobe_output = subprocess.check_output(ffprobe_command, universal_newlines=True).strip().split(',')
115
- frame_rate, frame_count = ffprobe_output
116
-
117
- frac = fractions.Fraction(frame_rate)
118
- original_fps = float(frac.numerator) / float(frac.denominator)
119
- frame_count = int(frame_count)
120
-
121
  return frame_count, original_fps
122
 
123
  def process_frames(frames_folder, aligned_faces_folder, frame_count, progress, batch_size):
124
  embeddings_by_frame = {}
125
  emotions_by_frame = {}
126
  frame_files = sorted([f for f in os.listdir(frames_folder) if f.endswith('.jpg')])
127
-
128
  for i in range(0, len(frame_files), batch_size):
129
- batch_files = frame_files[i:i+batch_size]
130
  batch_frames = []
131
  batch_nums = []
132
-
133
  for frame_file in batch_files:
134
  frame_num = int(frame_file.split('_')[1].split('.')[0])
135
  frame_path = os.path.join(frames_folder, frame_file)
@@ -137,12 +142,13 @@ def process_frames(frames_folder, aligned_faces_folder, frame_count, progress, b
137
  if frame is not None:
138
  batch_frames.append(frame)
139
  batch_nums.append(frame_num)
140
-
141
  if batch_frames:
142
  # Detect faces in batch
143
  batch_boxes, batch_probs = mtcnn.detect(batch_frames)
144
-
145
- for j, (frame, frame_num, boxes, probs) in enumerate(zip(batch_frames, batch_nums, batch_boxes, batch_probs)):
 
146
  if boxes is not None and len(boxes) > 0 and probs[0] >= 0.99:
147
  x1, y1, x2, y2 = [int(b) for b in boxes[0]]
148
  face = frame[y1:y2, x1:x2]
@@ -155,11 +161,13 @@ def process_frames(frames_folder, aligned_faces_folder, frame_count, progress, b
155
  embedding, emotion = get_face_embedding_and_emotion(aligned_face_resized)
156
  embeddings_by_frame[frame_num] = embedding
157
  emotions_by_frame[frame_num] = emotion
158
-
159
- progress((i + len(batch_files)) / frame_count, f"Processing frames {i + 1} to {min(i + len(batch_files), frame_count)} of {frame_count}")
 
160
 
161
  return embeddings_by_frame, emotions_by_frame
162
 
 
163
  def cluster_embeddings(embeddings):
164
  if len(embeddings) < 2:
165
  print("Not enough embeddings for clustering. Assigning all to one cluster.")
@@ -171,6 +179,7 @@ def cluster_embeddings(embeddings):
171
  clusters = kmeans.fit_predict(embeddings_scaled)
172
  return clusters
173
 
 
174
  def organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder, organized_faces_folder):
175
  for (frame_num, embedding), cluster in zip(embeddings_by_frame.items(), clusters):
176
  person_folder = os.path.join(organized_faces_folder, f"person_{cluster}")
@@ -179,7 +188,9 @@ def organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder
179
  dst = os.path.join(person_folder, f"frame_{frame_num}_face.jpg")
180
  shutil.copy(src, dst)
181
 
182
- def save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, desired_fps, original_fps, output_folder, num_components):
 
 
183
  emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
184
  person_data = {}
185
 
@@ -224,6 +235,7 @@ def save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, de
224
 
225
  return df, largest_cluster
226
 
 
227
  class LSTMAutoencoder(nn.Module):
228
  def __init__(self, input_size, hidden_size=64, num_layers=2):
229
  super(LSTMAutoencoder, self).__init__()
@@ -239,6 +251,7 @@ class LSTMAutoencoder(nn.Module):
239
  out = self.fc(outputs)
240
  return out
241
 
 
242
  def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, batch_size=64):
243
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
244
 
@@ -287,9 +300,10 @@ def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, bat
287
  # Compute anomalies for components only
288
  component_columns = [col for col in feature_columns if col.startswith('Comp')]
289
  component_indices = [feature_columns.index(col) for col in component_columns]
290
-
291
  if len(component_indices) > 0:
292
- mse_comp = np.mean(np.power(X.squeeze(0).cpu().numpy()[:, component_indices] - reconstructed[:, component_indices], 2), axis=1)
 
293
  else:
294
  mse_comp = mse_all # If no components, use all features
295
 
@@ -297,10 +311,11 @@ def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, bat
297
  anomalies_comp = np.zeros(len(mse_comp), dtype=bool)
298
  anomalies_comp[top_indices_comp] = True
299
 
300
- return (anomalies_all, mse_all, top_indices_all,
301
- anomalies_comp, mse_comp, top_indices_comp,
302
  model)
303
 
 
304
  def plot_anomaly_scores(df, anomaly_scores, top_indices, title):
305
  fig, ax = plt.subplots(figsize=(16, 8))
306
  bars = ax.bar(range(len(df)), anomaly_scores, width=0.8, color='skyblue')
@@ -311,10 +326,12 @@ def plot_anomaly_scores(df, anomaly_scores, top_indices, title):
311
  ax.set_title(f'Anomaly Scores Over Time ({title})')
312
  ax.xaxis.set_major_locator(MaxNLocator(nbins=100))
313
  ticks = ax.get_xticks()
314
- ax.set_xticklabels([df['Timecode'].iloc[int(tick)] if tick >= 0 and tick < len(df) else '' for tick in ticks], rotation=90, ha='right')
 
315
  plt.tight_layout()
316
  return fig
317
 
 
318
  def plot_emotion(df, emotion, num_anomalies, color):
319
  fig, ax = plt.subplots(figsize=(16, 8))
320
  values = df[emotion].values
@@ -327,12 +344,15 @@ def plot_emotion(df, emotion, num_anomalies, color):
327
  ax.set_title(f'{emotion.capitalize()} Anomalies Over Time (Top {num_anomalies} in Red)')
328
  ax.xaxis.set_major_locator(MaxNLocator(nbins=100))
329
  ticks = ax.get_xticks()
330
- ax.set_xticklabels([df['Timecode'].iloc[int(tick)] if tick >= 0 and tick < len(df) else '' for tick in ticks], rotation=90, ha='right')
 
331
  plt.tight_layout()
332
  return fig
333
 
 
334
  import base64
335
 
 
336
  def get_random_face_sample(organized_faces_folder, largest_cluster, output_folder):
337
  person_folder = os.path.join(organized_faces_folder, f"person_{largest_cluster}")
338
  face_files = [f for f in os.listdir(person_folder) if f.endswith('.jpg')]
@@ -340,34 +360,40 @@ def get_random_face_sample(organized_faces_folder, largest_cluster, output_folde
340
  random_face = np.random.choice(face_files)
341
  face_path = os.path.join(person_folder, random_face)
342
  output_path = os.path.join(output_folder, "random_face_sample.jpg")
343
-
344
  # Read the image and resize it to be smaller
345
  face_img = cv2.imread(face_path)
346
  small_face = cv2.resize(face_img, (100, 100)) # Resize to NxN pixels
347
  cv2.imwrite(output_path, small_face)
348
-
349
  return output_path
350
  return None
351
 
 
352
  def process_video(video_path, num_anomalies, num_components, desired_fps, batch_size, progress=gr.Progress()):
353
  output_folder = "output"
354
  os.makedirs(output_folder, exist_ok=True)
355
-
356
  with tempfile.TemporaryDirectory() as temp_dir:
357
  aligned_faces_folder = os.path.join(temp_dir, 'aligned_faces')
358
  organized_faces_folder = os.path.join(temp_dir, 'organized_faces')
359
  os.makedirs(aligned_faces_folder, exist_ok=True)
360
  os.makedirs(organized_faces_folder, exist_ok=True)
361
 
362
- progress(0.1, "Extracting frames")
363
  frames_folder = os.path.join(temp_dir, 'extracted_frames')
364
- extract_frames(video_path, frames_folder, desired_fps)
365
 
366
- progress(0.2, "Getting video info")
367
- frame_count, original_fps = get_video_info(video_path)
 
 
 
 
 
368
 
369
  progress(0.3, "Processing frames")
370
- embeddings_by_frame, emotions_by_frame = process_frames(frames_folder, aligned_faces_folder, frame_count, progress, batch_size)
 
371
 
372
  if not embeddings_by_frame:
373
  return "No faces were extracted from the video.", None, None, None, None, None, None
@@ -380,15 +406,18 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
380
  organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder, organized_faces_folder)
381
 
382
  progress(0.8, "Saving person data")
383
- df, largest_cluster = save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, desired_fps, original_fps, temp_dir, num_components)
 
384
 
385
  progress(0.9, "Performing anomaly detection")
386
- feature_columns = [col for col in df.columns if col not in ['Frame', 'Timecode', 'Time (Minutes)', 'Embedding_Index']]
 
387
  X = df[feature_columns].values
388
  print(f"Shape of input data: {X.shape}")
389
  print(f"Feature columns: {feature_columns}")
390
  try:
391
- anomalies_all, anomaly_scores_all, top_indices_all, anomalies_comp, anomaly_scores_comp, top_indices_comp, _ = lstm_anomaly_detection(X, feature_columns, num_anomalies=num_anomalies, batch_size=batch_size)
 
392
  except Exception as e:
393
  print(f"Error details: {str(e)}")
394
  print(f"X shape: {X.shape}")
@@ -410,38 +439,39 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
410
  except Exception as e:
411
  return f"Error generating plots: {str(e)}", None, None, None, None, None, None, None, None, None
412
 
413
-
414
  # Get a random face sample
415
  face_sample = get_random_face_sample(organized_faces_folder, largest_cluster, output_folder)
416
-
417
  progress(1.0, "Preparing results")
418
  results = f"Top {num_anomalies} anomalies (All Features):\n"
419
- results += "\n".join([f"{score:.4f} at {timecode}" for score, timecode in
420
  zip(anomaly_scores_all[top_indices_all], df['Timecode'].iloc[top_indices_all].values)])
421
  results += f"\n\nTop {num_anomalies} anomalies (Components Only):\n"
422
- results += "\n".join([f"{score:.4f} at {timecode}" for score, timecode in
423
  zip(anomaly_scores_comp[top_indices_comp], df['Timecode'].iloc[top_indices_comp].values)])
424
 
425
  for emotion in ['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral']:
426
  top_indices = np.argsort(df[emotion].values)[-num_anomalies:][::-1]
427
  results += f"\n\nTop {num_anomalies} {emotion.capitalize()} Scores:\n"
428
  results += "\n".join([f"{df[emotion].iloc[i]:.4f} at {df['Timecode'].iloc[i]}" for i in top_indices])
429
-
430
  return (
431
  results, # Text results to a Textbox
432
  face_sample, # Random face sample image
433
  anomaly_plot_all,
434
  anomaly_plot_comp,
435
  *emotion_plots
436
- )
437
-
438
- # Gradio interface
 
 
439
  iface = gr.Interface(
440
  fn=process_video,
441
  inputs=[
442
  gr.Video(),
443
  gr.Slider(minimum=1, maximum=20, step=1, value=5, label="Number of Anomalies"),
444
- gr.Slider(minimum=2, maximum=5, step=1, value=3, label="Number of Components"),
445
  gr.Slider(minimum=1, maximum=30, step=1, value=20, label="Desired FPS"),
446
  gr.Slider(minimum=1, maximum=64, step=1, value=16, label="Batch Size")
447
  ],
@@ -461,7 +491,7 @@ iface = gr.Interface(
461
  description="""
462
  This application detects anomalies in facial expressions and emotions from a video input.
463
  It focuses on the most frequently appearing person in the video for analysis.
464
-
465
  Adjust the parameters as needed:
466
  - Number of Anomalies: How many top anomalies or high intensities to highlight
467
  - Number of Components: Complexity of the facial expression model
@@ -471,4 +501,4 @@ iface = gr.Interface(
471
  )
472
 
473
  if __name__ == "__main__":
474
- iface.launch()
 
4
  import torch
5
  import torch.nn as nn
6
  import torch.optim as optim
7
+ import torchvision
8
  from facenet_pytorch import InceptionResnetV1, MTCNN
9
  import mediapipe as mp
10
  from fer import FER
11
  from sklearn.cluster import KMeans
12
  from sklearn.preprocessing import StandardScaler, MinMaxScaler
 
 
13
  import umap
14
  import pandas as pd
15
  import matplotlib.pyplot as plt
16
  from matplotlib.ticker import MaxNLocator
17
+ from moviepy.editor import VideoFileClip
18
+ from PIL import Image
19
  import gradio as gr
20
  import tempfile
21
  import shutil
22
  import subprocess
23
  import fractions
24
 
25
+
26
  # Suppress TensorFlow warnings
27
  os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
28
  import tensorflow as tf
29
+
30
  tf.get_logger().setLevel('ERROR')
31
 
32
  # Initialize models and other global variables
33
+ device = 'cuda'
34
 
35
+ mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.999, 0.999, 0.999], min_face_size=100,
36
+ selection_method='largest')
37
  model = InceptionResnetV1(pretrained='vggface2').eval().to(device)
38
  mp_face_mesh = mp.solutions.face_mesh
39
  face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.5)
40
  emotion_detector = FER(mtcnn=False)
41
 
42
+
43
  def frame_to_timecode(frame_num, original_fps, desired_fps):
44
  total_seconds = frame_num / original_fps
45
  hours = int(total_seconds // 3600)
 
48
  milliseconds = int((total_seconds - int(total_seconds)) * 1000)
49
  return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
50
 
51
+
52
  def get_face_embedding_and_emotion(face_img):
53
  face_tensor = torch.tensor(face_img).permute(2, 0, 1).unsqueeze(0).float() / 255
54
  face_tensor = (face_tensor - 0.5) / 0.5
 
64
 
65
  return embedding.cpu().numpy().flatten(), emotion_dict
66
 
67
+
68
  def alignFace(img):
69
  img_raw = img.copy()
70
  results = face_mesh.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
 
90
  new_img = cv2.warpAffine(img_raw, rotation_matrix, (width, height))
91
  return new_img
92
 
93
+
94
+ def extract_frames(video_path, output_folder, desired_fps, progress_callback=None):
95
  os.makedirs(output_folder, exist_ok=True)
96
+
97
+ # Load the video clip
98
+ clip = VideoFileClip(video_path)
99
+
100
+ original_fps = clip.fps
101
+ duration = clip.duration
102
+ total_frames = int(duration * original_fps)
103
+ step = max(1, original_fps / desired_fps)
104
+ total_frames_to_extract = int(total_frames / step)
105
+
106
+ frame_count = 0
107
+ for t in np.arange(0, duration, step / original_fps):
108
+ # Get the frame at time t
109
+ frame = clip.get_frame(t)
110
+
111
+ # Convert the frame to PIL Image and save it
112
+ img = Image.fromarray(frame)
113
+ img.save(os.path.join(output_folder, f"frame_{frame_count:04d}.jpg"))
114
+
115
+ frame_count += 1
116
+
117
+ # Report progress
118
+ if progress_callback:
119
+ progress = frame_count / total_frames_to_extract
120
+ progress_callback(progress, f"Extracting frame {frame_count} of {total_frames_to_extract}")
121
+
122
+ if frame_count >= total_frames_to_extract:
123
+ break
124
+
125
+ clip.close()
 
 
 
126
  return frame_count, original_fps
127
 
128
  def process_frames(frames_folder, aligned_faces_folder, frame_count, progress, batch_size):
129
  embeddings_by_frame = {}
130
  emotions_by_frame = {}
131
  frame_files = sorted([f for f in os.listdir(frames_folder) if f.endswith('.jpg')])
132
+
133
  for i in range(0, len(frame_files), batch_size):
134
+ batch_files = frame_files[i:i + batch_size]
135
  batch_frames = []
136
  batch_nums = []
137
+
138
  for frame_file in batch_files:
139
  frame_num = int(frame_file.split('_')[1].split('.')[0])
140
  frame_path = os.path.join(frames_folder, frame_file)
 
142
  if frame is not None:
143
  batch_frames.append(frame)
144
  batch_nums.append(frame_num)
145
+
146
  if batch_frames:
147
  # Detect faces in batch
148
  batch_boxes, batch_probs = mtcnn.detect(batch_frames)
149
+
150
+ for j, (frame, frame_num, boxes, probs) in enumerate(
151
+ zip(batch_frames, batch_nums, batch_boxes, batch_probs)):
152
  if boxes is not None and len(boxes) > 0 and probs[0] >= 0.99:
153
  x1, y1, x2, y2 = [int(b) for b in boxes[0]]
154
  face = frame[y1:y2, x1:x2]
 
161
  embedding, emotion = get_face_embedding_and_emotion(aligned_face_resized)
162
  embeddings_by_frame[frame_num] = embedding
163
  emotions_by_frame[frame_num] = emotion
164
+
165
+ progress((i + len(batch_files)) / frame_count,
166
+ f"Processing frames {i + 1} to {min(i + len(batch_files), frame_count)} of {frame_count}")
167
 
168
  return embeddings_by_frame, emotions_by_frame
169
 
170
+
171
  def cluster_embeddings(embeddings):
172
  if len(embeddings) < 2:
173
  print("Not enough embeddings for clustering. Assigning all to one cluster.")
 
179
  clusters = kmeans.fit_predict(embeddings_scaled)
180
  return clusters
181
 
182
+
183
  def organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder, organized_faces_folder):
184
  for (frame_num, embedding), cluster in zip(embeddings_by_frame.items(), clusters):
185
  person_folder = os.path.join(organized_faces_folder, f"person_{cluster}")
 
188
  dst = os.path.join(person_folder, f"frame_{frame_num}_face.jpg")
189
  shutil.copy(src, dst)
190
 
191
+
192
+ def save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, desired_fps, original_fps, output_folder,
193
+ num_components):
194
  emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
195
  person_data = {}
196
 
 
235
 
236
  return df, largest_cluster
237
 
238
+
239
  class LSTMAutoencoder(nn.Module):
240
  def __init__(self, input_size, hidden_size=64, num_layers=2):
241
  super(LSTMAutoencoder, self).__init__()
 
251
  out = self.fc(outputs)
252
  return out
253
 
254
+
255
  def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, batch_size=64):
256
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
257
 
 
300
  # Compute anomalies for components only
301
  component_columns = [col for col in feature_columns if col.startswith('Comp')]
302
  component_indices = [feature_columns.index(col) for col in component_columns]
303
+
304
  if len(component_indices) > 0:
305
+ mse_comp = np.mean(
306
+ np.power(X.squeeze(0).cpu().numpy()[:, component_indices] - reconstructed[:, component_indices], 2), axis=1)
307
  else:
308
  mse_comp = mse_all # If no components, use all features
309
 
 
311
  anomalies_comp = np.zeros(len(mse_comp), dtype=bool)
312
  anomalies_comp[top_indices_comp] = True
313
 
314
+ return (anomalies_all, mse_all, top_indices_all,
315
+ anomalies_comp, mse_comp, top_indices_comp,
316
  model)
317
 
318
+
319
  def plot_anomaly_scores(df, anomaly_scores, top_indices, title):
320
  fig, ax = plt.subplots(figsize=(16, 8))
321
  bars = ax.bar(range(len(df)), anomaly_scores, width=0.8, color='skyblue')
 
326
  ax.set_title(f'Anomaly Scores Over Time ({title})')
327
  ax.xaxis.set_major_locator(MaxNLocator(nbins=100))
328
  ticks = ax.get_xticks()
329
+ ax.set_xticklabels([df['Timecode'].iloc[int(tick)] if tick >= 0 and tick < len(df) else '' for tick in ticks],
330
+ rotation=90, ha='right')
331
  plt.tight_layout()
332
  return fig
333
 
334
+
335
  def plot_emotion(df, emotion, num_anomalies, color):
336
  fig, ax = plt.subplots(figsize=(16, 8))
337
  values = df[emotion].values
 
344
  ax.set_title(f'{emotion.capitalize()} Anomalies Over Time (Top {num_anomalies} in Red)')
345
  ax.xaxis.set_major_locator(MaxNLocator(nbins=100))
346
  ticks = ax.get_xticks()
347
+ ax.set_xticklabels([df['Timecode'].iloc[int(tick)] if tick >= 0 and tick < len(df) else '' for tick in ticks],
348
+ rotation=90, ha='right')
349
  plt.tight_layout()
350
  return fig
351
 
352
+
353
  import base64
354
 
355
+
356
  def get_random_face_sample(organized_faces_folder, largest_cluster, output_folder):
357
  person_folder = os.path.join(organized_faces_folder, f"person_{largest_cluster}")
358
  face_files = [f for f in os.listdir(person_folder) if f.endswith('.jpg')]
 
360
  random_face = np.random.choice(face_files)
361
  face_path = os.path.join(person_folder, random_face)
362
  output_path = os.path.join(output_folder, "random_face_sample.jpg")
363
+
364
  # Read the image and resize it to be smaller
365
  face_img = cv2.imread(face_path)
366
  small_face = cv2.resize(face_img, (100, 100)) # Resize to NxN pixels
367
  cv2.imwrite(output_path, small_face)
368
+
369
  return output_path
370
  return None
371
 
372
+
373
  def process_video(video_path, num_anomalies, num_components, desired_fps, batch_size, progress=gr.Progress()):
374
  output_folder = "output"
375
  os.makedirs(output_folder, exist_ok=True)
376
+
377
  with tempfile.TemporaryDirectory() as temp_dir:
378
  aligned_faces_folder = os.path.join(temp_dir, 'aligned_faces')
379
  organized_faces_folder = os.path.join(temp_dir, 'organized_faces')
380
  os.makedirs(aligned_faces_folder, exist_ok=True)
381
  os.makedirs(organized_faces_folder, exist_ok=True)
382
 
383
+ progress(0.05, "Starting frame extraction")
384
  frames_folder = os.path.join(temp_dir, 'extracted_frames')
 
385
 
386
+ def extraction_progress(percent, message):
387
+ # Adjust the progress to fit within the 5% to 30% range of the overall process
388
+ overall_progress = 0.05 + (percent * 0.25)
389
+ progress(overall_progress, message)
390
+
391
+ frame_count, original_fps = extract_frames(video_path, frames_folder, desired_fps, extraction_progress)
392
+
393
 
394
  progress(0.3, "Processing frames")
395
+ embeddings_by_frame, emotions_by_frame = process_frames(frames_folder, aligned_faces_folder, frame_count,
396
+ progress, batch_size)
397
 
398
  if not embeddings_by_frame:
399
  return "No faces were extracted from the video.", None, None, None, None, None, None
 
406
  organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder, organized_faces_folder)
407
 
408
  progress(0.8, "Saving person data")
409
+ df, largest_cluster = save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, desired_fps,
410
+ original_fps, temp_dir, num_components)
411
 
412
  progress(0.9, "Performing anomaly detection")
413
+ feature_columns = [col for col in df.columns if
414
+ col not in ['Frame', 'Timecode', 'Time (Minutes)', 'Embedding_Index']]
415
  X = df[feature_columns].values
416
  print(f"Shape of input data: {X.shape}")
417
  print(f"Feature columns: {feature_columns}")
418
  try:
419
+ anomalies_all, anomaly_scores_all, top_indices_all, anomalies_comp, anomaly_scores_comp, top_indices_comp, _ = lstm_anomaly_detection(
420
+ X, feature_columns, num_anomalies=num_anomalies, batch_size=batch_size)
421
  except Exception as e:
422
  print(f"Error details: {str(e)}")
423
  print(f"X shape: {X.shape}")
 
439
  except Exception as e:
440
  return f"Error generating plots: {str(e)}", None, None, None, None, None, None, None, None, None
441
 
 
442
  # Get a random face sample
443
  face_sample = get_random_face_sample(organized_faces_folder, largest_cluster, output_folder)
444
+
445
  progress(1.0, "Preparing results")
446
  results = f"Top {num_anomalies} anomalies (All Features):\n"
447
+ results += "\n".join([f"{score:.4f} at {timecode}" for score, timecode in
448
  zip(anomaly_scores_all[top_indices_all], df['Timecode'].iloc[top_indices_all].values)])
449
  results += f"\n\nTop {num_anomalies} anomalies (Components Only):\n"
450
+ results += "\n".join([f"{score:.4f} at {timecode}" for score, timecode in
451
  zip(anomaly_scores_comp[top_indices_comp], df['Timecode'].iloc[top_indices_comp].values)])
452
 
453
  for emotion in ['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral']:
454
  top_indices = np.argsort(df[emotion].values)[-num_anomalies:][::-1]
455
  results += f"\n\nTop {num_anomalies} {emotion.capitalize()} Scores:\n"
456
  results += "\n".join([f"{df[emotion].iloc[i]:.4f} at {df['Timecode'].iloc[i]}" for i in top_indices])
457
+
458
  return (
459
  results, # Text results to a Textbox
460
  face_sample, # Random face sample image
461
  anomaly_plot_all,
462
  anomaly_plot_comp,
463
  *emotion_plots
464
+ )
465
+
466
+ # Gradio interface
467
+
468
+
469
  iface = gr.Interface(
470
  fn=process_video,
471
  inputs=[
472
  gr.Video(),
473
  gr.Slider(minimum=1, maximum=20, step=1, value=5, label="Number of Anomalies"),
474
+ gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Number of Components"),
475
  gr.Slider(minimum=1, maximum=30, step=1, value=20, label="Desired FPS"),
476
  gr.Slider(minimum=1, maximum=64, step=1, value=16, label="Batch Size")
477
  ],
 
491
  description="""
492
  This application detects anomalies in facial expressions and emotions from a video input.
493
  It focuses on the most frequently appearing person in the video for analysis.
494
+
495
  Adjust the parameters as needed:
496
  - Number of Anomalies: How many top anomalies or high intensities to highlight
497
  - Number of Components: Complexity of the facial expression model
 
501
  )
502
 
503
  if __name__ == "__main__":
504
+ iface.launch()