Spaces:
Configuration error
Configuration error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,1312 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import cv2
|
3 |
+
import numpy as np
|
4 |
+
import pandas as pd
|
5 |
+
import time
|
6 |
+
import dlib
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
from matplotlib.colors import LinearSegmentedColormap
|
9 |
+
from matplotlib.collections import LineCollection
|
10 |
+
import os
|
11 |
+
import datetime
|
12 |
+
import tempfile
|
13 |
+
from typing import Dict, List, Tuple, Optional, Union, Any
|
14 |
+
import google.generativeai as genai
|
15 |
+
from PIL import Image
|
16 |
+
import json
|
17 |
+
import warnings
|
18 |
+
from deepface import DeepFace
|
19 |
+
import base64
|
20 |
+
import io
|
21 |
+
from pathlib import Path
|
22 |
+
|
23 |
+
# Suppress warnings for cleaner output
|
24 |
+
warnings.filterwarnings('ignore')
|
25 |
+
|
26 |
+
# --- Constants ---
|
27 |
+
VIDEO_FPS = 30 # Target FPS for saved video
|
28 |
+
CSV_FILENAME_TEMPLATE = "facial_analysis_{timestamp}.csv"
|
29 |
+
VIDEO_FILENAME_TEMPLATE = "processed_{timestamp}.mp4"
|
30 |
+
TEMP_DIR = Path("temp_frames")
|
31 |
+
TEMP_DIR.mkdir(exist_ok=True)
|
32 |
+
|
33 |
+
# --- Configure Google Gemini API ---
|
34 |
+
print("Configuring Google Gemini API...")
|
35 |
+
try:
|
36 |
+
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
37 |
+
if not GOOGLE_API_KEY:
|
38 |
+
raise ValueError("GOOGLE_API_KEY environment variable not set.")
|
39 |
+
|
40 |
+
genai.configure(api_key=GOOGLE_API_KEY)
|
41 |
+
# Use gemini-2.0-flash for quick responses
|
42 |
+
model = genai.GenerativeModel('gemini-2.0-flash')
|
43 |
+
GEMINI_ENABLED = True
|
44 |
+
print("Google Gemini API configured successfully.")
|
45 |
+
except Exception as e:
|
46 |
+
print(f"WARNING: Failed to configure Google Gemini API: {e}")
|
47 |
+
print("Running with simulated Gemini API responses.")
|
48 |
+
GEMINI_ENABLED = False
|
49 |
+
|
50 |
+
# --- Initialize dlib and DeepFace for facial analysis ---
|
51 |
+
print("Initializing dlib face detector and shape predictor...")
|
52 |
+
try:
|
53 |
+
# Initialize dlib's face detector and facial landmark predictor
|
54 |
+
face_detector = dlib.get_frontal_face_detector()
|
55 |
+
|
56 |
+
# Paths to shape predictor model file
|
57 |
+
# You need to download this file from:
|
58 |
+
# http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
|
59 |
+
predictor_path = "shape_predictor_68_face_landmarks.dat"
|
60 |
+
|
61 |
+
# Check if the predictor file exists, otherwise inform the user
|
62 |
+
if not os.path.exists(predictor_path):
|
63 |
+
print(f"WARNING: {predictor_path} not found. Please download from:")
|
64 |
+
print("http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2")
|
65 |
+
print("Extract and place in the current directory.")
|
66 |
+
# Use a placeholder or alternative
|
67 |
+
shape_predictor = None
|
68 |
+
else:
|
69 |
+
shape_predictor = dlib.shape_predictor(predictor_path)
|
70 |
+
|
71 |
+
print("dlib face detector initialized successfully.")
|
72 |
+
except Exception as e:
|
73 |
+
print(f"ERROR initializing dlib face detector: {e}")
|
74 |
+
face_detector = None
|
75 |
+
shape_predictor = None
|
76 |
+
|
77 |
+
# --- Metrics Definition ---
|
78 |
+
metrics = [
|
79 |
+
"valence", "arousal", "dominance", "cognitive_load",
|
80 |
+
"emotional_stability", "openness", "agreeableness",
|
81 |
+
"neuroticism", "conscientiousness", "extraversion",
|
82 |
+
"stress_index", "engagement_level"
|
83 |
+
]
|
84 |
+
|
85 |
+
# DeepFace emotion mapping
|
86 |
+
emotion_mapping = {
|
87 |
+
"angry": {"valence": 0.2, "arousal": 0.8, "dominance": 0.7},
|
88 |
+
"disgust": {"valence": 0.2, "arousal": 0.6, "dominance": 0.5},
|
89 |
+
"fear": {"valence": 0.2, "arousal": 0.8, "dominance": 0.3},
|
90 |
+
"happy": {"valence": 0.9, "arousal": 0.7, "dominance": 0.6},
|
91 |
+
"sad": {"valence": 0.3, "arousal": 0.4, "dominance": 0.3},
|
92 |
+
"surprise": {"valence": 0.6, "arousal": 0.9, "dominance": 0.5},
|
93 |
+
"neutral": {"valence": 0.5, "arousal": 0.5, "dominance": 0.5}
|
94 |
+
}
|
95 |
+
|
96 |
+
ad_context_columns = ["ad_description", "ad_detail", "ad_type", "gemini_ad_analysis"]
|
97 |
+
user_state_columns = ["user_state", "enhanced_user_state"]
|
98 |
+
all_columns = ['timestamp', 'frame_number'] + metrics + ad_context_columns + user_state_columns
|
99 |
+
initial_metrics_df = pd.DataFrame(columns=all_columns)
|
100 |
+
|
101 |
+
# --- Gemini API Functions ---
|
102 |
+
|
103 |
+
def call_gemini_api_for_ad(description, detail, ad_type):
|
104 |
+
"""
|
105 |
+
Uses Google Gemini to analyze ad context.
|
106 |
+
"""
|
107 |
+
print(f"Analyzing ad context: '{description}' ({ad_type})")
|
108 |
+
|
109 |
+
if not GEMINI_ENABLED:
|
110 |
+
# Simulated response
|
111 |
+
analysis = f"Simulated analysis: Ad='{description or 'N/A'}' ({ad_type}), Focus='{detail or 'N/A'}'."
|
112 |
+
if not description and not detail:
|
113 |
+
analysis = "No ad context provided."
|
114 |
+
print(f"Simulated Gemini Result: {analysis}")
|
115 |
+
return analysis
|
116 |
+
else:
|
117 |
+
try:
|
118 |
+
prompt = f"""
|
119 |
+
Please analyze this advertisement context:
|
120 |
+
- Description: {description}
|
121 |
+
- Detail focus: {detail}
|
122 |
+
- Type/Genre: {ad_type}
|
123 |
+
|
124 |
+
Provide a concise analysis of what emotional and cognitive responses might be expected from viewers.
|
125 |
+
Limit your response to 100 words.
|
126 |
+
"""
|
127 |
+
|
128 |
+
response = model.generate_content(prompt)
|
129 |
+
return response.text
|
130 |
+
except Exception as e:
|
131 |
+
print(f"Error calling Gemini for ad context: {e}")
|
132 |
+
return f"Error analyzing ad context: {str(e)}"
|
133 |
+
|
134 |
+
def interpret_metrics_with_gemini(metrics_dict, ad_context=None):
|
135 |
+
"""
|
136 |
+
Uses Google Gemini to interpret facial metrics and determine user state.
|
137 |
+
"""
|
138 |
+
if not metrics_dict:
|
139 |
+
return "No metrics", "No facial data detected"
|
140 |
+
|
141 |
+
if not GEMINI_ENABLED:
|
142 |
+
# Basic rule-based simulation for user state
|
143 |
+
valence = metrics_dict.get('valence', 0.5)
|
144 |
+
arousal = metrics_dict.get('arousal', 0.5)
|
145 |
+
cog_load = metrics_dict.get('cognitive_load', 0.5)
|
146 |
+
stress = metrics_dict.get('stress_index', 0.5)
|
147 |
+
engagement = metrics_dict.get('engagement_level', 0.5)
|
148 |
+
|
149 |
+
# Simple rule-based simulation
|
150 |
+
state = "Neutral"
|
151 |
+
if valence > 0.65 and arousal > 0.55 and engagement > 0.6:
|
152 |
+
state = "Positive, Engaged"
|
153 |
+
elif valence < 0.4 and stress > 0.6:
|
154 |
+
state = "Stressed, Negative"
|
155 |
+
elif cog_load > 0.7 and engagement < 0.4:
|
156 |
+
state = "Confused, Disengaged"
|
157 |
+
elif arousal < 0.4 and engagement < 0.5:
|
158 |
+
state = "Calm, Passive"
|
159 |
+
|
160 |
+
enhanced_state = f"The viewer appears {state.lower()} while watching this content. They are likely not fully connecting with the message."
|
161 |
+
|
162 |
+
return state, enhanced_state
|
163 |
+
else:
|
164 |
+
try:
|
165 |
+
# Format metrics for Gemini
|
166 |
+
metrics_formatted = "\n".join([f"- {k.replace('_', ' ').title()}: {v:.2f}" for k, v in metrics_dict.items()
|
167 |
+
if k not in ('timestamp', 'frame_number')])
|
168 |
+
|
169 |
+
# Include ad context if available
|
170 |
+
ad_info = ""
|
171 |
+
if ad_context:
|
172 |
+
ad_desc = ad_context.get('ad_description', 'N/A')
|
173 |
+
ad_type = ad_context.get('ad_type', 'N/A')
|
174 |
+
ad_info = f"\nThey are watching an advertisement: {ad_desc} (Type: {ad_type})"
|
175 |
+
|
176 |
+
prompt = f"""
|
177 |
+
Analyze these facial metrics (scale 0-1) of a person watching an advertisement{ad_info}:
|
178 |
+
{metrics_formatted}
|
179 |
+
|
180 |
+
Provide two outputs:
|
181 |
+
1. User State: A short 1-3 word description of their emotional/cognitive state
|
182 |
+
2. Enhanced Analysis: A detailed 1-2 sentence interpretation of their reaction to the content
|
183 |
+
|
184 |
+
Format as JSON: {{"user_state": "STATE", "enhanced_user_state": "DETAILED ANALYSIS"}}
|
185 |
+
"""
|
186 |
+
|
187 |
+
response = model.generate_content(prompt)
|
188 |
+
|
189 |
+
try:
|
190 |
+
# Try to parse as JSON
|
191 |
+
result = json.loads(response.text)
|
192 |
+
return result.get("user_state", "Uncertain"), result.get("enhanced_user_state", "Analysis unavailable")
|
193 |
+
except json.JSONDecodeError:
|
194 |
+
# If not valid JSON, try to extract manually
|
195 |
+
text = response.text
|
196 |
+
if "user_state" in text and "enhanced_user_state" in text:
|
197 |
+
parts = text.split("enhanced_user_state")
|
198 |
+
user_state = parts[0].split("user_state")[1].replace('"', '').replace(':', '').replace(',', '').strip()
|
199 |
+
enhanced = parts[1].replace('"', '').replace(':', '').replace('}', '').strip()
|
200 |
+
return user_state, enhanced
|
201 |
+
else:
|
202 |
+
# Just return the raw text as enhanced state
|
203 |
+
return "Analyzed", text
|
204 |
+
|
205 |
+
except Exception as e:
|
206 |
+
print(f"Error calling Gemini for metric interpretation: {e}")
|
207 |
+
return "Error", f"Error analyzing facial metrics: {str(e)}"
|
208 |
+
|
209 |
+
# --- Facial Analysis Functions with dlib and DeepFace ---
|
210 |
+
|
211 |
+
def extract_face_landmarks_dlib(image):
|
212 |
+
"""Extract facial landmarks using dlib"""
|
213 |
+
if image is None or face_detector is None or shape_predictor is None:
|
214 |
+
return None
|
215 |
+
|
216 |
+
try:
|
217 |
+
# Convert to grayscale for dlib
|
218 |
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
219 |
+
|
220 |
+
# Detect faces
|
221 |
+
faces = face_detector(gray, 0)
|
222 |
+
|
223 |
+
if len(faces) == 0:
|
224 |
+
return None
|
225 |
+
|
226 |
+
# Get the largest face by area
|
227 |
+
largest_face = faces[0]
|
228 |
+
largest_area = (faces[0].right() - faces[0].left()) * (faces[0].bottom() - faces[0].top())
|
229 |
+
|
230 |
+
for face in faces:
|
231 |
+
area = (face.right() - face.left()) * (face.bottom() - face.top())
|
232 |
+
if area > largest_area:
|
233 |
+
largest_face = face
|
234 |
+
largest_area = area
|
235 |
+
|
236 |
+
# Get facial landmarks
|
237 |
+
landmarks = shape_predictor(gray, largest_face)
|
238 |
+
|
239 |
+
# Return both the face detection rectangle and landmarks
|
240 |
+
return {"rect": largest_face, "landmarks": landmarks}
|
241 |
+
|
242 |
+
except Exception as e:
|
243 |
+
print(f"Error in dlib landmark extraction: {e}")
|
244 |
+
return None
|
245 |
+
|
246 |
+
def analyze_face_with_deepface(image):
|
247 |
+
"""Analyze facial emotions using DeepFace"""
|
248 |
+
if image is None:
|
249 |
+
return None
|
250 |
+
|
251 |
+
try:
|
252 |
+
# Convert to RGB for DeepFace if needed
|
253 |
+
if len(image.shape) == 3 and image.shape[2] == 3:
|
254 |
+
# Check if BGR and convert to RGB if needed
|
255 |
+
if np.mean(image[:,:,0]) < np.mean(image[:,:,2]): # Rough BGR check
|
256 |
+
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
257 |
+
else:
|
258 |
+
image_rgb = image
|
259 |
+
else:
|
260 |
+
# Handle grayscale or other formats
|
261 |
+
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
262 |
+
|
263 |
+
# Save image to temp file (DeepFace sometimes works better with files)
|
264 |
+
temp_img = f"temp_frames/temp_analysis_{time.time()}.jpg"
|
265 |
+
cv2.imwrite(temp_img, image_rgb)
|
266 |
+
|
267 |
+
# Analyze with DeepFace
|
268 |
+
analysis = DeepFace.analyze(
|
269 |
+
img_path=temp_img,
|
270 |
+
actions=['emotion'],
|
271 |
+
enforce_detection=False, # Don't throw error if face not detected
|
272 |
+
detector_backend='opencv' # Faster detection
|
273 |
+
)
|
274 |
+
|
275 |
+
# Remove temporary file
|
276 |
+
try:
|
277 |
+
os.remove(temp_img)
|
278 |
+
except:
|
279 |
+
pass
|
280 |
+
|
281 |
+
# Return the first face analysis (assuming single face)
|
282 |
+
if isinstance(analysis, list) and len(analysis) > 0:
|
283 |
+
return analysis[0]
|
284 |
+
else:
|
285 |
+
return analysis
|
286 |
+
|
287 |
+
except Exception as e:
|
288 |
+
print(f"DeepFace analysis error: {e}")
|
289 |
+
return None
|
290 |
+
|
291 |
+
def calculate_ear_dlib(landmarks):
|
292 |
+
"""Calculate Eye Aspect Ratio using dlib landmarks"""
|
293 |
+
if landmarks is None:
|
294 |
+
return 0.0
|
295 |
+
|
296 |
+
try:
|
297 |
+
# dlib's 68-point face model landmark indices
|
298 |
+
# Left eye: 36-41, Right eye: 42-47
|
299 |
+
LEFT_EYE = range(36, 42)
|
300 |
+
RIGHT_EYE = range(42, 48)
|
301 |
+
|
302 |
+
def get_eye_aspect_ratio(eye_points):
|
303 |
+
# Compute the euclidean distances between the two sets of vertical landmarks
|
304 |
+
v1 = np.linalg.norm(eye_points[1] - eye_points[5])
|
305 |
+
v2 = np.linalg.norm(eye_points[2] - eye_points[4])
|
306 |
+
# Compute the euclidean distance between the horizontal landmarks
|
307 |
+
h = np.linalg.norm(eye_points[0] - eye_points[3])
|
308 |
+
# Compute the eye aspect ratio
|
309 |
+
return (v1 + v2) / (2.0 * h) if h > 1e-6 else 0.0
|
310 |
+
|
311 |
+
# Extract landmark coordinates
|
312 |
+
landmark_coords = np.array([[landmarks.part(i).x, landmarks.part(i).y] for i in range(68)])
|
313 |
+
|
314 |
+
# Calculate EAR for left and right eyes
|
315 |
+
left_eye_coords = landmark_coords[list(LEFT_EYE)]
|
316 |
+
right_eye_coords = landmark_coords[list(RIGHT_EYE)]
|
317 |
+
|
318 |
+
left_ear = get_eye_aspect_ratio(left_eye_coords)
|
319 |
+
right_ear = get_eye_aspect_ratio(right_eye_coords)
|
320 |
+
|
321 |
+
# Return average of both eyes
|
322 |
+
return (left_ear + right_ear) / 2.0
|
323 |
+
|
324 |
+
except Exception as e:
|
325 |
+
print(f"Error calculating EAR: {e}")
|
326 |
+
return 0.0
|
327 |
+
|
328 |
+
def calculate_mar_dlib(landmarks):
|
329 |
+
"""Calculate Mouth Aspect Ratio using dlib landmarks"""
|
330 |
+
if landmarks is None:
|
331 |
+
return 0.0
|
332 |
+
|
333 |
+
try:
|
334 |
+
# dlib's 68-point face model landmark indices for mouth
|
335 |
+
# Mouth outer: 48-59, Mouth inner: 60-67
|
336 |
+
MOUTH_OUTER = range(48, 60)
|
337 |
+
MOUTH_INNER = range(60, 68)
|
338 |
+
|
339 |
+
# Extract landmark coordinates
|
340 |
+
landmark_coords = np.array([[landmarks.part(i).x, landmarks.part(i).y] for i in range(68)])
|
341 |
+
|
342 |
+
# Use specific points for vertical and horizontal measurements
|
343 |
+
# Vertical: distance between top and bottom lips
|
344 |
+
top_lip = landmark_coords[51] # Top lip center
|
345 |
+
bottom_lip = landmark_coords[57] # Bottom lip center
|
346 |
+
vertical = np.linalg.norm(top_lip - bottom_lip)
|
347 |
+
|
348 |
+
# Horizontal: distance between mouth corners
|
349 |
+
left_corner = landmark_coords[48] # Left mouth corner
|
350 |
+
right_corner = landmark_coords[54] # Right mouth corner
|
351 |
+
horizontal = np.linalg.norm(left_corner - right_corner)
|
352 |
+
|
353 |
+
# Calculate ratio
|
354 |
+
return vertical / horizontal if horizontal > 1e-6 else 0.0
|
355 |
+
|
356 |
+
except Exception as e:
|
357 |
+
print(f"Error calculating MAR: {e}")
|
358 |
+
return 0.0
|
359 |
+
|
360 |
+
def calculate_eyebrow_position_dlib(landmarks):
|
361 |
+
"""Calculate eyebrow position using dlib landmarks"""
|
362 |
+
if landmarks is None:
|
363 |
+
return 0.0
|
364 |
+
|
365 |
+
try:
|
366 |
+
# dlib's 68-point face model landmark indices
|
367 |
+
# Left eyebrow: 17-21, Right eyebrow: 22-26
|
368 |
+
# Left eye: 36-41, Right eye: 42-47
|
369 |
+
L_BROW_C = 19 # Center of left eyebrow
|
370 |
+
R_BROW_C = 24 # Center of right eyebrow
|
371 |
+
L_EYE_C = 37 # Center top of left eye
|
372 |
+
R_EYE_C = 43 # Center top of right eye
|
373 |
+
|
374 |
+
# Extract landmark coordinates
|
375 |
+
landmark_coords = np.array([[landmarks.part(i).x, landmarks.part(i).y] for i in range(68)])
|
376 |
+
|
377 |
+
# Calculate distances between eyebrows and eyes
|
378 |
+
l_brow_y = landmark_coords[L_BROW_C][1]
|
379 |
+
r_brow_y = landmark_coords[R_BROW_C][1]
|
380 |
+
l_eye_y = landmark_coords[L_EYE_C][1]
|
381 |
+
r_eye_y = landmark_coords[R_EYE_C][1]
|
382 |
+
|
383 |
+
# Calculate vertical distances (smaller value means eyebrows are raised)
|
384 |
+
l_dist = l_eye_y - l_brow_y
|
385 |
+
r_dist = r_eye_y - r_brow_y
|
386 |
+
|
387 |
+
# Average the distances and normalize
|
388 |
+
avg_dist = (l_dist + r_dist) / 2.0
|
389 |
+
# Approximate normalization based on typical face proportions
|
390 |
+
# Higher value means eyebrows are raised more
|
391 |
+
norm = (avg_dist - 5) / 15 # Adjusted for typical pixel distances
|
392 |
+
|
393 |
+
return max(0.0, min(1.0, norm))
|
394 |
+
|
395 |
+
except Exception as e:
|
396 |
+
print(f"Error calculating Eyebrow Position: {e}")
|
397 |
+
return 0.0
|
398 |
+
|
399 |
+
def estimate_head_pose_dlib(landmarks):
|
400 |
+
"""Estimate head pose using dlib landmarks"""
|
401 |
+
if landmarks is None:
|
402 |
+
return 0.0, 0.0
|
403 |
+
|
404 |
+
try:
|
405 |
+
# dlib's 68-point face model landmark indices
|
406 |
+
NOSE_TIP = 30 # Nose tip
|
407 |
+
LEFT_EYE_C = 37 # Left eye center
|
408 |
+
RIGHT_EYE_C = 44 # Right eye center
|
409 |
+
|
410 |
+
# Extract landmark coordinates
|
411 |
+
landmark_coords = np.array([[landmarks.part(i).x, landmarks.part(i).y] for i in range(68)])
|
412 |
+
|
413 |
+
# Get key points
|
414 |
+
nose_pt = landmark_coords[NOSE_TIP]
|
415 |
+
l_eye_pt = landmark_coords[LEFT_EYE_C]
|
416 |
+
r_eye_pt = landmark_coords[RIGHT_EYE_C]
|
417 |
+
|
418 |
+
# Calculate eye midpoint
|
419 |
+
eye_mid_x = (l_eye_pt[0] + r_eye_pt[0]) / 2.0
|
420 |
+
eye_mid_y = (l_eye_pt[1] + r_eye_pt[1]) / 2.0
|
421 |
+
|
422 |
+
# Calculate tilt
|
423 |
+
v_tilt = nose_pt[1] - eye_mid_y # Vertical tilt
|
424 |
+
h_tilt = nose_pt[0] - eye_mid_x # Horizontal tilt
|
425 |
+
|
426 |
+
# Normalize based on typical facial proportions
|
427 |
+
v_tilt_norm = v_tilt / 30.0 # Approximate normalization
|
428 |
+
h_tilt_norm = h_tilt / 20.0 # Approximate normalization
|
429 |
+
|
430 |
+
# Clip to range [-1, 1]
|
431 |
+
v_tilt_norm = max(-1.0, min(1.0, v_tilt_norm))
|
432 |
+
h_tilt_norm = max(-1.0, min(1.0, h_tilt_norm))
|
433 |
+
|
434 |
+
return v_tilt_norm, h_tilt_norm
|
435 |
+
|
436 |
+
except Exception as e:
|
437 |
+
print(f"Error estimating Head Pose: {e}")
|
438 |
+
return 0.0, 0.0
|
439 |
+
|
440 |
+
def calculate_metrics_enhanced(facial_data, deepface_data=None, ad_context=None):
|
441 |
+
"""
|
442 |
+
Calculate facial metrics using a combination of dlib landmarks and DeepFace emotions.
|
443 |
+
This provides a more robust approach by integrating both geometric and deep learning methods.
|
444 |
+
"""
|
445 |
+
if ad_context is None:
|
446 |
+
ad_context = {}
|
447 |
+
|
448 |
+
# Initialize default metrics
|
449 |
+
default_metrics = {m: 0.5 for m in metrics}
|
450 |
+
|
451 |
+
# If no facial data, return defaults
|
452 |
+
if not facial_data:
|
453 |
+
return default_metrics
|
454 |
+
|
455 |
+
# Extract landmarks from facial data
|
456 |
+
landmarks = facial_data.get("landmarks")
|
457 |
+
|
458 |
+
# If we have DeepFace data, use it to influence our metrics
|
459 |
+
emotion_weights = None
|
460 |
+
dominant_emotion = None
|
461 |
+
|
462 |
+
if deepface_data and "emotion" in deepface_data:
|
463 |
+
emotion_weights = deepface_data["emotion"]
|
464 |
+
# Find dominant emotion
|
465 |
+
dominant_emotion = max(emotion_weights.items(), key=lambda x: x[1])[0]
|
466 |
+
|
467 |
+
# Calculate base geometric features if landmarks are available
|
468 |
+
ear = calculate_ear_dlib(landmarks) if landmarks else 0.2
|
469 |
+
mar = calculate_mar_dlib(landmarks) if landmarks else 0.5
|
470 |
+
eb_pos = calculate_eyebrow_position_dlib(landmarks) if landmarks else 0.5
|
471 |
+
v_tilt, h_tilt = estimate_head_pose_dlib(landmarks) if landmarks else (0.0, 0.0)
|
472 |
+
|
473 |
+
# Combine geometric features with emotion weights
|
474 |
+
|
475 |
+
# Step 1: Start with default metrics
|
476 |
+
calculated_metrics = default_metrics.copy()
|
477 |
+
|
478 |
+
# Step 2: Update based on geometric features
|
479 |
+
cl = max(0, min(1, 1.0 - ear * 2.5)) # Cognitive load: Higher when eyes are more closed
|
480 |
+
|
481 |
+
# Step 3: If we have emotion data from DeepFace, incorporate it
|
482 |
+
if dominant_emotion and emotion_weights:
|
483 |
+
# Get base values from emotion mapping
|
484 |
+
base_vals = emotion_mapping.get(dominant_emotion, {"valence": 0.5, "arousal": 0.5, "dominance": 0.5})
|
485 |
+
|
486 |
+
# Calculate confidence-weighted emotion values
|
487 |
+
confidence = emotion_weights.get(dominant_emotion, 0) / 100.0 # Convert percentage to 0-1
|
488 |
+
|
489 |
+
# Combine geometric and emotion-based metrics with weighted approach
|
490 |
+
val = base_vals["valence"] * confidence + (mar * 0.7 * (1.0 - eb_pos) * 0.3) * (1 - confidence)
|
491 |
+
arsl = base_vals["arousal"] * confidence + ((mar + (1.0 - ear) + eb_pos) / 3.0) * (1 - confidence)
|
492 |
+
dom = base_vals["dominance"] * confidence + (0.5 + v_tilt) * (1 - confidence)
|
493 |
+
else:
|
494 |
+
# Fallback to geometric features only
|
495 |
+
val = max(0, min(1, mar * 2.0 * (1.0 - eb_pos)))
|
496 |
+
arsl = max(0, min(1, (mar + (1.0 - ear) + eb_pos) / 3.0))
|
497 |
+
dom = max(0, min(1, 0.5 + v_tilt))
|
498 |
+
|
499 |
+
# Illustrative Context Adjustments from ad
|
500 |
+
ad_type = ad_context.get('ad_type', 'Unknown')
|
501 |
+
gem_txt = str(ad_context.get('gemini_ad_analysis', '')).lower()
|
502 |
+
|
503 |
+
# Adjust based on ad context
|
504 |
+
val_adj = 0.1 if ad_type == 'Funny' or 'humor' in gem_txt else 0.0
|
505 |
+
arsl_adj = 0.1 if ad_type == 'Action' or 'exciting' in gem_txt else 0.0
|
506 |
+
|
507 |
+
# Apply adjustments
|
508 |
+
val = max(0, min(1, val + val_adj))
|
509 |
+
arsl = max(0, min(1, arsl + arsl_adj))
|
510 |
+
|
511 |
+
# Calculate secondary metrics
|
512 |
+
neur = max(0, min(1, (cl * 0.6) + ((1.0 - val) * 0.4)))
|
513 |
+
em_stab = 1.0 - neur
|
514 |
+
extr = max(0, min(1, (arsl * 0.5) + (val * 0.5)))
|
515 |
+
open = max(0, min(1, 0.5 + ((mar - 0.5) * 0.5)))
|
516 |
+
agree = max(0, min(1, (val * 0.7) + ((1.0 - arsl) * 0.3)))
|
517 |
+
consc = max(0, min(1, (1.0 - abs(arsl - 0.5)) * 0.7 + (em_stab * 0.3)))
|
518 |
+
stress = max(0, min(1, (cl * 0.5) + (eb_pos * 0.3) + ((1.0 - val) * 0.2)))
|
519 |
+
engag = max(0, min(1, (arsl * 0.7) + ((1.0 - abs(h_tilt)) * 0.3)))
|
520 |
+
|
521 |
+
# Update the metrics dictionary
|
522 |
+
calculated_metrics.update({
|
523 |
+
'valence': val,
|
524 |
+
'arousal': arsl,
|
525 |
+
'dominance': dom,
|
526 |
+
'cognitive_load': cl,
|
527 |
+
'emotional_stability': em_stab,
|
528 |
+
'openness': open,
|
529 |
+
'agreeableness': agree,
|
530 |
+
'neuroticism': neur,
|
531 |
+
'conscientiousness': consc,
|
532 |
+
'extraversion': extr,
|
533 |
+
'stress_index': stress,
|
534 |
+
'engagement_level': engag
|
535 |
+
})
|
536 |
+
|
537 |
+
return calculated_metrics
|
538 |
+
|
539 |
+
def update_metrics_visualization(metrics_values):
|
540 |
+
"""Create a visualization of metrics"""
|
541 |
+
if not metrics_values:
|
542 |
+
fig, ax = plt.subplots(figsize=(10, 8))
|
543 |
+
ax.text(0.5, 0.5, "Waiting for facial metrics...", ha='center', va='center')
|
544 |
+
ax.axis('off')
|
545 |
+
fig.patch.set_facecolor('#FFFFFF')
|
546 |
+
ax.set_facecolor('#FFFFFF')
|
547 |
+
return fig
|
548 |
+
|
549 |
+
# Filter out non-metric keys
|
550 |
+
filtered_metrics = {k: v for k, v in metrics_values.items()
|
551 |
+
if k in metrics and isinstance(v, (int, float))}
|
552 |
+
|
553 |
+
if not filtered_metrics:
|
554 |
+
fig, ax = plt.subplots(figsize=(10, 8))
|
555 |
+
ax.text(0.5, 0.5, "No valid metrics available", ha='center', va='center')
|
556 |
+
ax.axis('off')
|
557 |
+
return fig
|
558 |
+
|
559 |
+
num_metrics = len(filtered_metrics)
|
560 |
+
nrows = (num_metrics + 2) // 3
|
561 |
+
fig, axs = plt.subplots(nrows, 3, figsize=(10, nrows * 2.5), facecolor='#FFFFFF')
|
562 |
+
axs = axs.flatten()
|
563 |
+
|
564 |
+
colors = [(0.1, 0.1, 0.9), (0.9, 0.9, 0.1), (0.9, 0.1, 0.1)]
|
565 |
+
cmap = LinearSegmentedColormap.from_list("custom_cmap", colors, N=100)
|
566 |
+
norm = plt.Normalize(0, 1)
|
567 |
+
metric_idx = 0
|
568 |
+
|
569 |
+
for key, value in filtered_metrics.items():
|
570 |
+
value = max(0.0, min(1.0, value)) # Clip value for safety
|
571 |
+
|
572 |
+
ax = axs[metric_idx]
|
573 |
+
ax.set_title(key.replace('_', ' ').title(), fontsize=10)
|
574 |
+
ax.set_xlim(0, 1)
|
575 |
+
ax.set_ylim(0, 0.5)
|
576 |
+
ax.set_aspect('equal')
|
577 |
+
ax.axis('off')
|
578 |
+
ax.set_facecolor('#FFFFFF')
|
579 |
+
|
580 |
+
r = 0.4
|
581 |
+
theta = np.linspace(np.pi, 0, 100)
|
582 |
+
x_bg = 0.5 + r * np.cos(theta)
|
583 |
+
y_bg = 0.1 + r * np.sin(theta)
|
584 |
+
ax.plot(x_bg, y_bg, 'k-', linewidth=3, alpha=0.2)
|
585 |
+
|
586 |
+
value_angle = np.pi * (1 - value)
|
587 |
+
num_points = max(2, int(100 * value))
|
588 |
+
value_theta = np.linspace(np.pi, value_angle, num_points)
|
589 |
+
x_val = 0.5 + r * np.cos(value_theta)
|
590 |
+
y_val = 0.1 + r * np.sin(value_theta)
|
591 |
+
|
592 |
+
if len(x_val) > 1:
|
593 |
+
points = np.array([x_val, y_val]).T.reshape(-1, 1, 2)
|
594 |
+
segments = np.concatenate([points[:-1], points[1:]], axis=1)
|
595 |
+
segment_values = np.linspace(0, value, len(segments))
|
596 |
+
lc = LineCollection(segments, cmap=cmap, norm=norm)
|
597 |
+
lc.set_array(segment_values)
|
598 |
+
lc.set_linewidth(5)
|
599 |
+
ax.add_collection(lc)
|
600 |
+
|
601 |
+
ax.text(0.5, 0.15, f"{value:.2f}", ha='center', va='center', fontsize=11,
|
602 |
+
fontweight='bold', bbox=dict(facecolor='white', alpha=0.7, boxstyle='round,pad=0.2'))
|
603 |
+
|
604 |
+
metric_idx += 1
|
605 |
+
|
606 |
+
for i in range(metric_idx, len(axs)):
|
607 |
+
axs[i].axis('off')
|
608 |
+
|
609 |
+
plt.tight_layout(pad=0.5)
|
610 |
+
return fig
|
611 |
+
|
612 |
+
def annotate_frame(frame, facial_data, metrics=None, enhanced_state=None):
|
613 |
+
"""
|
614 |
+
Add facial landmark annotations and metrics to a frame
|
615 |
+
"""
|
616 |
+
if frame is None:
|
617 |
+
return None
|
618 |
+
|
619 |
+
annotated = frame.copy()
|
620 |
+
|
621 |
+
# If we have facial data, draw the landmarks
|
622 |
+
if facial_data and "landmarks" in facial_data:
|
623 |
+
landmarks = facial_data["landmarks"]
|
624 |
+
rect = facial_data.get("rect")
|
625 |
+
|
626 |
+
# Draw face rectangle if available
|
627 |
+
if rect:
|
628 |
+
x1, y1, x2, y2 = rect.left(), rect.top(), rect.right(), rect.bottom()
|
629 |
+
cv2.rectangle(annotated, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
630 |
+
|
631 |
+
# Draw the 68 facial landmarks
|
632 |
+
for i in range(68):
|
633 |
+
x, y = landmarks.part(i).x, landmarks.part(i).y
|
634 |
+
cv2.circle(annotated, (x, y), 2, (0, 0, 255), -1)
|
635 |
+
|
636 |
+
# Draw connecting lines for different facial features
|
637 |
+
# Eyes
|
638 |
+
for eye_points in [(36, 41), (42, 47)]: # Left eye, Right eye
|
639 |
+
for i in range(eye_points[0], eye_points[1]):
|
640 |
+
pt1 = (landmarks.part(i).x, landmarks.part(i).y)
|
641 |
+
pt2 = (landmarks.part(i + 1).x, landmarks.part(i + 1).y)
|
642 |
+
cv2.line(annotated, pt1, pt2, (0, 255, 255), 1)
|
643 |
+
# Connect last point to first
|
644 |
+
pt1 = (landmarks.part(eye_points[1]).x, landmarks.part(eye_points[1]).y)
|
645 |
+
pt2 = (landmarks.part(eye_points[0]).x, landmarks.part(eye_points[0]).y)
|
646 |
+
cv2.line(annotated, pt1, pt2, (0, 255, 255), 1)
|
647 |
+
|
648 |
+
# Eyebrows
|
649 |
+
for brow_points in [(17, 21), (22, 26)]: # Left eyebrow, Right eyebrow
|
650 |
+
for i in range(brow_points[0], brow_points[1]):
|
651 |
+
pt1 = (landmarks.part(i).x, landmarks.part(i).y)
|
652 |
+
pt2 = (landmarks.part(i + 1).x, landmarks.part(i + 1).y)
|
653 |
+
cv2.line(annotated, pt1, pt2, (255, 255, 0), 1)
|
654 |
+
|
655 |
+
# Nose
|
656 |
+
for i in range(27, 35):
|
657 |
+
pt1 = (landmarks.part(i).x, landmarks.part(i).y)
|
658 |
+
pt2 = (landmarks.part(i + 1).x, landmarks.part(i + 1).y)
|
659 |
+
cv2.line(annotated, pt1, pt2, (255, 0, 255), 1)
|
660 |
+
|
661 |
+
# Mouth outer
|
662 |
+
for i in range(48, 59):
|
663 |
+
pt1 = (landmarks.part(i).x, landmarks.part(i).y)
|
664 |
+
pt2 = (landmarks.part(i + 1).x, landmarks.part(i + 1).y)
|
665 |
+
cv2.line(annotated, pt1, pt2, (0, 255, 0), 1)
|
666 |
+
# Connect last point to first for mouth
|
667 |
+
pt1 = (landmarks.part(59).x, landmarks.part(59).y)
|
668 |
+
pt2 = (landmarks.part(48).x, landmarks.part(48).y)
|
669 |
+
cv2.line(annotated, pt1, pt2, (0, 255, 0), 1)
|
670 |
+
|
671 |
+
# Mouth inner
|
672 |
+
for i in range(60, 67):
|
673 |
+
pt1 = (landmarks.part(i).x, landmarks.part(i).y)
|
674 |
+
pt2 = (landmarks.part(i + 1).x, landmarks.part(i + 1).y)
|
675 |
+
cv2.line(annotated, pt1, pt2, (255, 0, 0), 1)
|
676 |
+
# Connect last point to first for inner mouth
|
677 |
+
pt1 = (landmarks.part(67).x, landmarks.part(67).y)
|
678 |
+
pt2 = (landmarks.part(60).x, landmarks.part(60).y)
|
679 |
+
cv2.line(annotated, pt1, pt2, (255, 0, 0), 1)
|
680 |
+
|
681 |
+
# Add metrics summary if available
|
682 |
+
if metrics:
|
683 |
+
# Format for display
|
684 |
+
h, w = annotated.shape[:2]
|
685 |
+
y_pos = 30 # Starting Y position
|
686 |
+
|
687 |
+
# Add user state if available
|
688 |
+
if enhanced_state:
|
689 |
+
# Draw background for text
|
690 |
+
text_size = cv2.getTextSize(enhanced_state, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)[0]
|
691 |
+
cv2.rectangle(annotated, (10, y_pos - 20), (10 + text_size[0], y_pos + 5), (0, 0, 0), -1)
|
692 |
+
# Draw text
|
693 |
+
cv2.putText(annotated, enhanced_state, (10, y_pos),
|
694 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
|
695 |
+
y_pos += 30
|
696 |
+
|
697 |
+
# Show top 3 metrics
|
698 |
+
top_metrics = sorted([(k, v) for k, v in metrics.items() if k in metrics],
|
699 |
+
key=lambda x: x[1], reverse=True)[:3]
|
700 |
+
|
701 |
+
for name, value in top_metrics:
|
702 |
+
metric_text = f"{name.replace('_', ' ').title()}: {value:.2f}"
|
703 |
+
text_size = cv2.getTextSize(metric_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0]
|
704 |
+
cv2.rectangle(annotated, (10, y_pos - 15), (10 + text_size[0], y_pos + 5), (0, 0, 0), -1)
|
705 |
+
cv2.putText(annotated, metric_text, (10, y_pos),
|
706 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
|
707 |
+
y_pos += 25
|
708 |
+
|
709 |
+
return annotated
|
710 |
+
|
711 |
+
# --- API 1: Video File Processing ---
|
712 |
+
|
713 |
+
def process_video_file(
|
714 |
+
video_file: Union[str, np.ndarray],
|
715 |
+
ad_description: str = "",
|
716 |
+
ad_detail: str = "",
|
717 |
+
ad_type: str = "Video",
|
718 |
+
sampling_rate: int = 5, # Process every Nth frame
|
719 |
+
save_processed_video: bool = True,
|
720 |
+
show_progress: bool = True
|
721 |
+
) -> Tuple[str, str, pd.DataFrame, List[np.ndarray]]:
|
722 |
+
"""
|
723 |
+
Process a video file and analyze facial expressions frame by frame
|
724 |
+
|
725 |
+
Args:
|
726 |
+
video_file: Path to video file or video array
|
727 |
+
ad_description: Description of the ad being watched
|
728 |
+
ad_detail: Detail focus of the ad
|
729 |
+
ad_type: Type of ad (Video, Image, Audio, Text, Funny, etc.)
|
730 |
+
sampling_rate: Process every Nth frame
|
731 |
+
save_processed_video: Whether to save the processed video with annotations
|
732 |
+
show_progress: Whether to show processing progress
|
733 |
+
|
734 |
+
Returns:
|
735 |
+
Tuple of (csv_path, processed_video_path, metrics_dataframe, processed_frames_list)
|
736 |
+
"""
|
737 |
+
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
738 |
+
csv_path = CSV_FILENAME_TEMPLATE.format(timestamp=timestamp)
|
739 |
+
video_path = VIDEO_FILENAME_TEMPLATE.format(timestamp=timestamp) if save_processed_video else None
|
740 |
+
|
741 |
+
# Setup ad context
|
742 |
+
gemini_result = call_gemini_api_for_ad(ad_description, ad_detail, ad_type)
|
743 |
+
ad_context = {
|
744 |
+
"ad_description": ad_description,
|
745 |
+
"ad_detail": ad_detail,
|
746 |
+
"ad_type": ad_type,
|
747 |
+
"gemini_ad_analysis": gemini_result
|
748 |
+
}
|
749 |
+
|
750 |
+
# Initialize capture
|
751 |
+
if isinstance(video_file, str):
|
752 |
+
cap = cv2.VideoCapture(video_file)
|
753 |
+
else:
|
754 |
+
# Create a temporary file for the video array
|
755 |
+
temp_dir = tempfile.mkdtemp()
|
756 |
+
temp_path = os.path.join(temp_dir, "temp_video.mp4")
|
757 |
+
|
758 |
+
# Convert video array to file
|
759 |
+
if isinstance(video_file, np.ndarray) and len(video_file.shape) == 4: # Multiple frames
|
760 |
+
h, w = video_file[0].shape[:2]
|
761 |
+
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
762 |
+
temp_writer = cv2.VideoWriter(temp_path, fourcc, 30, (w, h))
|
763 |
+
for frame in video_file:
|
764 |
+
temp_writer.write(frame)
|
765 |
+
temp_writer.release()
|
766 |
+
cap = cv2.VideoCapture(temp_path)
|
767 |
+
elif isinstance(video_file, np.ndarray) and len(video_file.shape) == 3: # Single frame
|
768 |
+
# For single frame, just process it directly
|
769 |
+
metrics_data = []
|
770 |
+
processed_frames = []
|
771 |
+
|
772 |
+
# Process the single frame
|
773 |
+
facial_data = extract_face_landmarks_dlib(video_file)
|
774 |
+
deepface_data = analyze_face_with_deepface(video_file)
|
775 |
+
|
776 |
+
if facial_data:
|
777 |
+
calculated_metrics = calculate_metrics_enhanced(facial_data, deepface_data, ad_context)
|
778 |
+
user_state, enhanced_state = interpret_metrics_with_gemini(calculated_metrics, ad_context)
|
779 |
+
|
780 |
+
# Create a row for the dataframe
|
781 |
+
row = {
|
782 |
+
'timestamp': 0.0,
|
783 |
+
'frame_number': 0,
|
784 |
+
**calculated_metrics,
|
785 |
+
**ad_context,
|
786 |
+
'user_state': user_state,
|
787 |
+
'enhanced_user_state': enhanced_state
|
788 |
+
}
|
789 |
+
metrics_data.append(row)
|
790 |
+
|
791 |
+
# Annotate the frame
|
792 |
+
annotated_frame = annotate_frame(video_file, facial_data, calculated_metrics, enhanced_state)
|
793 |
+
processed_frames.append(annotated_frame)
|
794 |
+
|
795 |
+
# Save processed image
|
796 |
+
if save_processed_video:
|
797 |
+
cv2.imwrite(video_path.replace('.mp4', '.jpg'), annotated_frame)
|
798 |
+
|
799 |
+
# Create DataFrame and save to CSV
|
800 |
+
metrics_df = pd.DataFrame(metrics_data)
|
801 |
+
if not metrics_df.empty:
|
802 |
+
metrics_df.to_csv(csv_path, index=False)
|
803 |
+
|
804 |
+
return csv_path, video_path.replace('.mp4', '.jpg') if save_processed_video else None, metrics_df, processed_frames
|
805 |
+
else:
|
806 |
+
print("Error: Invalid video input format")
|
807 |
+
return None, None, None, []
|
808 |
+
|
809 |
+
if not cap.isOpened():
|
810 |
+
print("Error: Could not open video.")
|
811 |
+
return None, None, None, []
|
812 |
+
|
813 |
+
# Get video properties
|
814 |
+
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
815 |
+
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
816 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
817 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
818 |
+
|
819 |
+
# Initialize video writer if saving processed video
|
820 |
+
if save_processed_video:
|
821 |
+
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
822 |
+
out = cv2.VideoWriter(video_path, fourcc, fps / sampling_rate, (frame_width, frame_height))
|
823 |
+
|
824 |
+
# Process video frames
|
825 |
+
metrics_data = []
|
826 |
+
processed_frames = []
|
827 |
+
frame_count = 0
|
828 |
+
|
829 |
+
if show_progress:
|
830 |
+
print(f"Processing video with {total_frames} frames at {fps} FPS")
|
831 |
+
print(f"Ad Context: {ad_description} ({ad_type})")
|
832 |
+
|
833 |
+
while True:
|
834 |
+
ret, frame = cap.read()
|
835 |
+
if not ret:
|
836 |
+
break
|
837 |
+
|
838 |
+
# Only process every Nth frame (according to sampling_rate)
|
839 |
+
if frame_count % sampling_rate == 0:
|
840 |
+
if show_progress and frame_count % (sampling_rate * 10) == 0:
|
841 |
+
print(f"Processing frame {frame_count}/{total_frames} ({frame_count/total_frames*100:.1f}%)")
|
842 |
+
|
843 |
+
# Extract facial landmarks and analyze with DeepFace
|
844 |
+
facial_data = extract_face_landmarks_dlib(frame)
|
845 |
+
deepface_data = analyze_face_with_deepface(frame)
|
846 |
+
|
847 |
+
# Calculate metrics if landmarks detected
|
848 |
+
if facial_data:
|
849 |
+
calculated_metrics = calculate_metrics_enhanced(facial_data, deepface_data, ad_context)
|
850 |
+
user_state, enhanced_state = interpret_metrics_with_gemini(calculated_metrics, ad_context)
|
851 |
+
|
852 |
+
# Create a row for the dataframe
|
853 |
+
row = {
|
854 |
+
'timestamp': frame_count / fps,
|
855 |
+
'frame_number': frame_count,
|
856 |
+
**calculated_metrics,
|
857 |
+
**ad_context,
|
858 |
+
'user_state': user_state,
|
859 |
+
'enhanced_user_state': enhanced_state
|
860 |
+
}
|
861 |
+
metrics_data.append(row)
|
862 |
+
|
863 |
+
# Annotate the frame
|
864 |
+
annotated_frame = annotate_frame(frame, facial_data, calculated_metrics, enhanced_state)
|
865 |
+
|
866 |
+
if save_processed_video:
|
867 |
+
out.write(annotated_frame)
|
868 |
+
processed_frames.append(annotated_frame)
|
869 |
+
else:
|
870 |
+
# No face detected
|
871 |
+
if save_processed_video:
|
872 |
+
# Add text to frame
|
873 |
+
no_face_frame = frame.copy()
|
874 |
+
cv2.putText(no_face_frame, "No face detected", (30, 30),
|
875 |
+
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
876 |
+
out.write(no_face_frame)
|
877 |
+
processed_frames.append(no_face_frame)
|
878 |
+
|
879 |
+
frame_count += 1
|
880 |
+
|
881 |
+
# Release resources
|
882 |
+
cap.release()
|
883 |
+
if save_processed_video:
|
884 |
+
out.release()
|
885 |
+
|
886 |
+
# Create DataFrame and save to CSV
|
887 |
+
metrics_df = pd.DataFrame(metrics_data)
|
888 |
+
if not metrics_df.empty:
|
889 |
+
metrics_df.to_csv(csv_path, index=False)
|
890 |
+
|
891 |
+
if show_progress:
|
892 |
+
print(f"Video processing complete. Analyzed {len(metrics_data)} frames.")
|
893 |
+
print(f"Results saved to {csv_path}")
|
894 |
+
if save_processed_video:
|
895 |
+
print(f"Processed video saved to {video_path}")
|
896 |
+
|
897 |
+
# Return results
|
898 |
+
return csv_path, video_path, metrics_df, processed_frames
|
899 |
+
|
900 |
+
# --- API 2: Webcam Processing Function ---
|
901 |
+
|
902 |
+
def process_webcam_frame(
|
903 |
+
frame: np.ndarray,
|
904 |
+
ad_context: Dict[str, Any],
|
905 |
+
metrics_data: pd.DataFrame,
|
906 |
+
frame_count: int,
|
907 |
+
start_time: float
|
908 |
+
) -> Tuple[np.ndarray, Dict[str, float], str, pd.DataFrame]:
|
909 |
+
"""
|
910 |
+
Process a single webcam frame
|
911 |
+
|
912 |
+
Args:
|
913 |
+
frame: Input frame from webcam
|
914 |
+
ad_context: Ad context dictionary
|
915 |
+
metrics_data: DataFrame to accumulate metrics
|
916 |
+
frame_count: Current frame count
|
917 |
+
start_time: Start time of the session
|
918 |
+
|
919 |
+
Returns:
|
920 |
+
Tuple of (annotated_frame, metrics_dict, enhanced_state, updated_metrics_df)
|
921 |
+
"""
|
922 |
+
if frame is None:
|
923 |
+
return None, None, None, metrics_data
|
924 |
+
|
925 |
+
# Extract facial landmarks and analyze with DeepFace
|
926 |
+
facial_data = extract_face_landmarks_dlib(frame)
|
927 |
+
deepface_data = analyze_face_with_deepface(frame)
|
928 |
+
|
929 |
+
# Calculate metrics if landmarks detected
|
930 |
+
if facial_data:
|
931 |
+
calculated_metrics = calculate_metrics_enhanced(facial_data, deepface_data, ad_context)
|
932 |
+
user_state, enhanced_state = interpret_metrics_with_gemini(calculated_metrics, ad_context)
|
933 |
+
|
934 |
+
# Create a row for the dataframe
|
935 |
+
current_time = time.time()
|
936 |
+
row = {
|
937 |
+
'timestamp': current_time - start_time,
|
938 |
+
'frame_number': frame_count,
|
939 |
+
**calculated_metrics,
|
940 |
+
**ad_context,
|
941 |
+
'user_state': user_state,
|
942 |
+
'enhanced_user_state': enhanced_state
|
943 |
+
}
|
944 |
+
|
945 |
+
# Add row to DataFrame
|
946 |
+
new_row_df = pd.DataFrame([row], columns=all_columns)
|
947 |
+
metrics_data = pd.concat([metrics_data, new_row_df], ignore_index=True)
|
948 |
+
|
949 |
+
# Annotate the frame
|
950 |
+
annotated_frame = annotate_frame(frame, facial_data, calculated_metrics, enhanced_state)
|
951 |
+
|
952 |
+
return annotated_frame, calculated_metrics, enhanced_state, metrics_data
|
953 |
+
else:
|
954 |
+
# No face detected
|
955 |
+
no_face_frame = frame.copy()
|
956 |
+
cv2.putText(no_face_frame, "No face detected", (30, 30),
|
957 |
+
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
958 |
+
return no_face_frame, None, "No face detected", metrics_data
|
959 |
+
|
960 |
+
def start_webcam_session(
|
961 |
+
ad_description: str = "",
|
962 |
+
ad_detail: str = "",
|
963 |
+
ad_type: str = "Video",
|
964 |
+
save_interval: int = 100, # Save CSV every N frames
|
965 |
+
record_video: bool = True
|
966 |
+
) -> Dict[str, Any]:
|
967 |
+
"""
|
968 |
+
Initialize a webcam session for facial analysis
|
969 |
+
|
970 |
+
Args:
|
971 |
+
ad_description: Description of the ad being watched
|
972 |
+
ad_detail: Detail focus of the ad
|
973 |
+
ad_type: Type of ad
|
974 |
+
save_interval: How often to save data to CSV
|
975 |
+
record_video: Whether to record processed frames for later saving
|
976 |
+
|
977 |
+
Returns:
|
978 |
+
Session context dictionary
|
979 |
+
"""
|
980 |
+
# Generate timestamp for file naming
|
981 |
+
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
982 |
+
csv_path = CSV_FILENAME_TEMPLATE.format(timestamp=timestamp)
|
983 |
+
video_path = VIDEO_FILENAME_TEMPLATE.format(timestamp=timestamp) if record_video else None
|
984 |
+
|
985 |
+
# Setup ad context
|
986 |
+
gemini_result = call_gemini_api_for_ad(ad_description, ad_detail, ad_type)
|
987 |
+
ad_context = {
|
988 |
+
"ad_description": ad_description,
|
989 |
+
"ad_detail": ad_detail,
|
990 |
+
"ad_type": ad_type,
|
991 |
+
"gemini_ad_analysis": gemini_result
|
992 |
+
}
|
993 |
+
|
994 |
+
# Initialize session context
|
995 |
+
session = {
|
996 |
+
"start_time": time.time(),
|
997 |
+
"frame_count": 0,
|
998 |
+
"metrics_data": initial_metrics_df.copy(),
|
999 |
+
"ad_context": ad_context,
|
1000 |
+
"csv_path": csv_path,
|
1001 |
+
"video_path": video_path,
|
1002 |
+
"save_interval": save_interval,
|
1003 |
+
"last_saved": 0,
|
1004 |
+
"record_video": record_video,
|
1005 |
+
"recorded_frames": [] if record_video else None,
|
1006 |
+
"timestamps": [] if record_video else None
|
1007 |
+
}
|
1008 |
+
|
1009 |
+
return session
|
1010 |
+
|
1011 |
+
def update_webcam_session(
|
1012 |
+
session: Dict[str, Any],
|
1013 |
+
frame: np.ndarray
|
1014 |
+
) -> Tuple[np.ndarray, Dict[str, float], str, Dict[str, Any]]:
|
1015 |
+
"""
|
1016 |
+
Update webcam session with a new frame
|
1017 |
+
|
1018 |
+
Args:
|
1019 |
+
session: Session context dictionary
|
1020 |
+
frame: New frame from webcam
|
1021 |
+
|
1022 |
+
Returns:
|
1023 |
+
Tuple of (annotated_frame, metrics_dict, enhanced_state, updated_session)
|
1024 |
+
"""
|
1025 |
+
# Process the frame
|
1026 |
+
annotated_frame, metrics, enhanced_state, updated_df = process_webcam_frame(
|
1027 |
+
frame,
|
1028 |
+
session["ad_context"],
|
1029 |
+
session["metrics_data"],
|
1030 |
+
session["frame_count"],
|
1031 |
+
session["start_time"]
|
1032 |
+
)
|
1033 |
+
|
1034 |
+
# Update session
|
1035 |
+
session["frame_count"] += 1
|
1036 |
+
session["metrics_data"] = updated_df
|
1037 |
+
|
1038 |
+
# Record frame if enabled
|
1039 |
+
if session["record_video"] and annotated_frame is not None:
|
1040 |
+
session["recorded_frames"].append(annotated_frame)
|
1041 |
+
session["timestamps"].append(time.time() - session["start_time"])
|
1042 |
+
|
1043 |
+
# Save CSV periodically
|
1044 |
+
if session["frame_count"] - session["last_saved"] >= session["save_interval"]:
|
1045 |
+
if not updated_df.empty:
|
1046 |
+
updated_df.to_csv(session["csv_path"], index=False)
|
1047 |
+
session["last_saved"] = session["frame_count"]
|
1048 |
+
|
1049 |
+
return annotated_frame, metrics, enhanced_state, session
|
1050 |
+
|
1051 |
+
def end_webcam_session(session: Dict[str, Any]) -> Tuple[str, str]:
|
1052 |
+
"""
|
1053 |
+
End a webcam session and save final results
|
1054 |
+
|
1055 |
+
Args:
|
1056 |
+
session: Session context dictionary
|
1057 |
+
|
1058 |
+
Returns:
|
1059 |
+
Tuple of (csv_path, video_path)
|
1060 |
+
"""
|
1061 |
+
# Save final metrics to CSV
|
1062 |
+
if not session["metrics_data"].empty:
|
1063 |
+
session["metrics_data"].to_csv(session["csv_path"], index=False)
|
1064 |
+
|
1065 |
+
# Save recorded video if available
|
1066 |
+
video_path = None
|
1067 |
+
if session["record_video"] and session["recorded_frames"]:
|
1068 |
+
try:
|
1069 |
+
frames = session["recorded_frames"]
|
1070 |
+
if frames:
|
1071 |
+
# Get frame dimensions
|
1072 |
+
height, width = frames[0].shape[:2]
|
1073 |
+
|
1074 |
+
# Calculate FPS based on actual timestamps
|
1075 |
+
if len(session["timestamps"]) > 1:
|
1076 |
+
# Calculate average time between frames
|
1077 |
+
time_diffs = np.diff(session["timestamps"])
|
1078 |
+
avg_frame_time = np.mean(time_diffs)
|
1079 |
+
fps = 1.0 / avg_frame_time if avg_frame_time > 0 else 15.0
|
1080 |
+
else:
|
1081 |
+
fps = 15.0 # Default FPS
|
1082 |
+
|
1083 |
+
# Create video writer
|
1084 |
+
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
1085 |
+
video_path = session["video_path"]
|
1086 |
+
out = cv2.VideoWriter(video_path, fourcc, fps, (width, height))
|
1087 |
+
|
1088 |
+
# Write frames
|
1089 |
+
for frame in frames:
|
1090 |
+
out.write(frame)
|
1091 |
+
|
1092 |
+
out.release()
|
1093 |
+
print(f"Recorded video saved to {video_path}")
|
1094 |
+
else:
|
1095 |
+
print("No frames recorded")
|
1096 |
+
except Exception as e:
|
1097 |
+
print(f"Error saving video: {e}")
|
1098 |
+
|
1099 |
+
print(f"Session ended. Data saved to {session['csv_path']}")
|
1100 |
+
return session["csv_path"], video_path
|
1101 |
+
|
1102 |
+
# --- Create Gradio Interface ---
|
1103 |
+
|
1104 |
+
def create_api_interface():
|
1105 |
+
with gr.Blocks(title="Enhanced Facial Analysis APIs") as iface:
|
1106 |
+
gr.Markdown("""
|
1107 |
+
# Enhanced Facial Analysis APIs
|
1108 |
+
|
1109 |
+
This interface provides two API endpoints:
|
1110 |
+
|
1111 |
+
1. **Video File API**: Upload and analyze pre-recorded videos
|
1112 |
+
2. **Webcam API**: Analyze live webcam feed in real-time
|
1113 |
+
|
1114 |
+
Both APIs use dlib for facial landmark detection, DeepFace for emotion analysis,
|
1115 |
+
and Google's Gemini API for enhanced interpretations.
|
1116 |
+
""")
|
1117 |
+
|
1118 |
+
with gr.Tab("Video File API"):
|
1119 |
+
with gr.Row():
|
1120 |
+
with gr.Column(scale=1):
|
1121 |
+
video_input = gr.Video(label="Upload Video")
|
1122 |
+
vid_ad_desc = gr.Textbox(label="Ad Description", placeholder="Enter a description of the advertisement being watched...")
|
1123 |
+
vid_ad_detail = gr.Textbox(label="Ad Detail Focus", placeholder="Enter specific aspects to focus on...")
|
1124 |
+
vid_ad_type = gr.Radio(
|
1125 |
+
["Video", "Image", "Audio", "Text", "Funny", "Serious", "Action", "Informative"],
|
1126 |
+
label="Ad Type/Genre",
|
1127 |
+
value="Video"
|
1128 |
+
)
|
1129 |
+
sampling_rate = gr.Slider(
|
1130 |
+
minimum=1, maximum=30, step=1, value=5,
|
1131 |
+
label="Sampling Rate (process every N frames)"
|
1132 |
+
)
|
1133 |
+
save_video = gr.Checkbox(label="Save Processed Video", value=True)
|
1134 |
+
process_btn = gr.Button("Process Video", variant="primary")
|
1135 |
+
|
1136 |
+
with gr.Column(scale=2):
|
1137 |
+
output_text = gr.Textbox(label="Processing Results", lines=3)
|
1138 |
+
with gr.Row():
|
1139 |
+
with gr.Column():
|
1140 |
+
output_video = gr.Video(label="Processed Video")
|
1141 |
+
with gr.Column():
|
1142 |
+
frame_gallery = gr.Gallery(label="Processed Frames",
|
1143 |
+
show_label=True, columns=2,
|
1144 |
+
height=400)
|
1145 |
+
|
1146 |
+
with gr.Row():
|
1147 |
+
with gr.Column():
|
1148 |
+
output_plot = gr.Plot(label="Sample Frame Metrics")
|
1149 |
+
with gr.Column():
|
1150 |
+
output_csv = gr.File(label="Download CSV Results")
|
1151 |
+
|
1152 |
+
# Define function to handle video processing and show frames
|
1153 |
+
def handle_video_processing(video, desc, detail, ad_type, rate, save_vid):
|
1154 |
+
if video is None:
|
1155 |
+
return "No video uploaded", None, None, [], None
|
1156 |
+
|
1157 |
+
try:
|
1158 |
+
result_text = "Starting video processing...\n"
|
1159 |
+
# Process the video
|
1160 |
+
csv_path, video_path, metrics_df, processed_frames = process_video_file(
|
1161 |
+
video,
|
1162 |
+
ad_description=desc,
|
1163 |
+
ad_detail=detail,
|
1164 |
+
ad_type=ad_type,
|
1165 |
+
sampling_rate=rate,
|
1166 |
+
save_processed_video=save_vid,
|
1167 |
+
show_progress=True
|
1168 |
+
)
|
1169 |
+
|
1170 |
+
if metrics_df is None or metrics_df.empty:
|
1171 |
+
return "No facial data detected in video", None, None, [], None
|
1172 |
+
|
1173 |
+
# Generate a sample metrics visualization
|
1174 |
+
sample_row = metrics_df.iloc[0].to_dict()
|
1175 |
+
metrics_plot = update_metrics_visualization(sample_row)
|
1176 |
+
|
1177 |
+
# Create a gallery of processed frames
|
1178 |
+
# Take a subset if there are too many frames (maximum ~20 for display)
|
1179 |
+
display_frames = []
|
1180 |
+
step = max(1, len(processed_frames) // 20)
|
1181 |
+
for i in range(0, len(processed_frames), step):
|
1182 |
+
if i < len(processed_frames):
|
1183 |
+
# Convert BGR to RGB for display
|
1184 |
+
rgb_frame = cv2.cvtColor(processed_frames[i], cv2.COLOR_BGR2RGB)
|
1185 |
+
display_frames.append(rgb_frame)
|
1186 |
+
|
1187 |
+
# Return results summary
|
1188 |
+
processed_count = metrics_df.shape[0]
|
1189 |
+
total_count = len(processed_frames)
|
1190 |
+
result_text = f"✅ Processed {processed_count} frames out of {total_count} total frames.\n"
|
1191 |
+
result_text += f"📊 CSV saved with {len(metrics_df.columns)} metrics columns.\n"
|
1192 |
+
if video_path:
|
1193 |
+
result_text += f"🎬 Processed video saved to: {video_path}"
|
1194 |
+
|
1195 |
+
return result_text, video_path, metrics_plot, display_frames, csv_path
|
1196 |
+
except Exception as e:
|
1197 |
+
return f"❌ Error processing video: {str(e)}", None, None, [], None
|
1198 |
+
|
1199 |
+
process_btn.click(
|
1200 |
+
handle_video_processing,
|
1201 |
+
inputs=[video_input, vid_ad_desc, vid_ad_detail, vid_ad_type, sampling_rate, save_video],
|
1202 |
+
outputs=[output_text, output_video, output_plot, frame_gallery, output_csv]
|
1203 |
+
)
|
1204 |
+
|
1205 |
+
with gr.Tab("Webcam API"):
|
1206 |
+
with gr.Row():
|
1207 |
+
with gr.Column(scale=2):
|
1208 |
+
webcam_input = gr.Image(sources="webcam", streaming=True, label="Webcam Input", type="numpy")
|
1209 |
+
|
1210 |
+
with gr.Row():
|
1211 |
+
with gr.Column():
|
1212 |
+
web_ad_desc = gr.Textbox(label="Ad Description", placeholder="Enter a description of the advertisement being watched...")
|
1213 |
+
web_ad_detail = gr.Textbox(label="Ad Detail Focus", placeholder="Enter specific aspects to focus on...")
|
1214 |
+
web_ad_type = gr.Radio(
|
1215 |
+
["Video", "Image", "Audio", "Text", "Funny", "Serious", "Action", "Informative"],
|
1216 |
+
label="Ad Type/Genre",
|
1217 |
+
value="Video"
|
1218 |
+
)
|
1219 |
+
with gr.Column():
|
1220 |
+
record_video_chk = gr.Checkbox(label="Record Video", value=True)
|
1221 |
+
start_session_btn = gr.Button("Start Session", variant="primary")
|
1222 |
+
end_session_btn = gr.Button("End Session", variant="stop")
|
1223 |
+
session_status = gr.Textbox(label="Session Status", placeholder="Session not started...")
|
1224 |
+
|
1225 |
+
with gr.Column(scale=2):
|
1226 |
+
processed_output = gr.Image(label="Processed Feed", type="numpy", height=360)
|
1227 |
+
|
1228 |
+
with gr.Row():
|
1229 |
+
with gr.Column():
|
1230 |
+
metrics_plot = gr.Plot(label="Current Metrics", height=300)
|
1231 |
+
with gr.Column():
|
1232 |
+
enhanced_state_txt = gr.Textbox(label="Enhanced State Analysis", lines=3)
|
1233 |
+
|
1234 |
+
with gr.Row():
|
1235 |
+
download_csv = gr.File(label="Download Session Data")
|
1236 |
+
download_video = gr.Video(label="Recorded Session")
|
1237 |
+
|
1238 |
+
# Session state
|
1239 |
+
session_data = gr.State(value=None)
|
1240 |
+
|
1241 |
+
# Define session handlers
|
1242 |
+
def start_session(desc, detail, ad_type, record_video):
|
1243 |
+
session = start_webcam_session(
|
1244 |
+
ad_description=desc,
|
1245 |
+
ad_detail=detail,
|
1246 |
+
ad_type=ad_type,
|
1247 |
+
record_video=record_video
|
1248 |
+
)
|
1249 |
+
return (
|
1250 |
+
session,
|
1251 |
+
f"Session started at {datetime.datetime.now().strftime('%H:%M:%S')}.\n"
|
1252 |
+
f"Ad context: {desc} ({ad_type}).\n"
|
1253 |
+
f"Data will be saved to {session['csv_path']}"
|
1254 |
+
)
|
1255 |
+
|
1256 |
+
def process_frame(frame, session):
|
1257 |
+
if session is None:
|
1258 |
+
return frame, None, "No active session. Click 'Start Session' to begin.", session
|
1259 |
+
|
1260 |
+
# Process the frame
|
1261 |
+
annotated_frame, metrics, enhanced_state, updated_session = update_webcam_session(session, frame)
|
1262 |
+
|
1263 |
+
# Update the metrics plot if metrics available
|
1264 |
+
if metrics:
|
1265 |
+
metrics_plot = update_metrics_visualization(metrics)
|
1266 |
+
return annotated_frame, metrics_plot, enhanced_state, updated_session
|
1267 |
+
else:
|
1268 |
+
# Return the annotated frame (likely with "No face detected")
|
1269 |
+
return annotated_frame, None, enhanced_state or "No metrics available", updated_session
|
1270 |
+
|
1271 |
+
def end_session(session):
|
1272 |
+
if session is None:
|
1273 |
+
return "No active session", None, None
|
1274 |
+
|
1275 |
+
csv_path, video_path = end_webcam_session(session)
|
1276 |
+
end_time = datetime.datetime.now().strftime('%H:%M:%S')
|
1277 |
+
result = f"Session ended at {end_time}.\n"
|
1278 |
+
|
1279 |
+
if csv_path:
|
1280 |
+
result += f"CSV data saved to: {csv_path}\n"
|
1281 |
+
if video_path:
|
1282 |
+
result += f"Video saved to: {video_path}"
|
1283 |
+
|
1284 |
+
return result, csv_path, video_path
|
1285 |
+
|
1286 |
+
start_session_btn.click(
|
1287 |
+
start_session,
|
1288 |
+
inputs=[web_ad_desc, web_ad_detail, web_ad_type, record_video_chk],
|
1289 |
+
outputs=[session_data, session_status]
|
1290 |
+
)
|
1291 |
+
|
1292 |
+
webcam_input.stream(
|
1293 |
+
process_frame,
|
1294 |
+
inputs=[webcam_input, session_data],
|
1295 |
+
outputs=[processed_output, metrics_plot, enhanced_state_txt, session_data]
|
1296 |
+
)
|
1297 |
+
|
1298 |
+
end_session_btn.click(
|
1299 |
+
end_session,
|
1300 |
+
inputs=[session_data],
|
1301 |
+
outputs=[session_status, download_csv, download_video]
|
1302 |
+
)
|
1303 |
+
|
1304 |
+
return iface
|
1305 |
+
|
1306 |
+
# Entry point
|
1307 |
+
if __name__ == "__main__":
|
1308 |
+
print("Starting Enhanced Facial Analysis API server...")
|
1309 |
+
print(f"Gemini API {'enabled' if GEMINI_ENABLED else 'disabled (using simulation)'}")
|
1310 |
+
print(f"Facial analysis using dlib and DeepFace")
|
1311 |
+
iface = create_api_interface()
|
1312 |
+
iface.launch(debug=True)
|