Update app.py
Browse files
app.py
CHANGED
@@ -1,94 +1,201 @@
|
|
1 |
import cv2
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import time
|
3 |
import csv
|
4 |
-
|
5 |
-
import numpy as np
|
6 |
-
import sounddevice as sd
|
7 |
-
import soundfile as sf
|
8 |
-
from deepface import DeepFace
|
9 |
-
import tempfile
|
10 |
-
import os
|
11 |
-
from scipy.io.wavfile import write as write_wav
|
12 |
-
|
13 |
|
14 |
-
#
|
15 |
SAMPLE_RATE = 16000
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
cap = cv2.VideoCapture(0)
|
38 |
-
|
39 |
-
data = []
|
40 |
-
|
41 |
-
# Start audio recording in parallel
|
42 |
-
audio_file_path = os.path.join(tempfile.gettempdir(), "temp_audio.wav")
|
43 |
-
record_audio(audio_file_path)
|
44 |
-
|
45 |
-
for i in range(DURATION):
|
46 |
ret, frame = cap.read()
|
47 |
-
if
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
valence, arousal, dominance, stress_index, engagement_level = map_emotions_to_metrics(emotion)
|
54 |
-
|
55 |
-
data.append([
|
56 |
-
round(time.time() - start_time, 2),
|
57 |
-
round(valence, 3),
|
58 |
-
round(arousal, 3),
|
59 |
-
round(dominance, 3),
|
60 |
-
round(stress_index, 3),
|
61 |
-
round(engagement_level, 3)
|
62 |
-
])
|
63 |
-
|
64 |
-
except Exception as e:
|
65 |
-
print("Error analyzing frame:", e)
|
66 |
-
time.sleep(1)
|
67 |
-
|
68 |
cap.release()
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
def
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
title="📊 Complex Campaign Emotion Response Engine",
|
90 |
-
description="This demo captures webcam and microphone input for 30 seconds. It analyzes facial expressions using DeepFace and records audio. The output is a downloadable CSV of emotional metrics over time."
|
91 |
-
)
|
92 |
-
|
93 |
if __name__ == "__main__":
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import cv2
|
2 |
+
import sounddevice as sd
|
3 |
+
import mediapipe as mp
|
4 |
+
import numpy as np
|
5 |
+
import pandas as pd
|
6 |
+
import librosa
|
7 |
+
import threading
|
8 |
import time
|
9 |
import csv
|
10 |
+
from collections import deque
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
+
# --- Configuration ---
|
13 |
SAMPLE_RATE = 16000
|
14 |
+
AUDIO_CHANNELS = 1
|
15 |
+
BUFFER_DURATION_SECONDS = 10 # Keep last 10s of data
|
16 |
+
PROCESSING_INTERVAL_SECONDS = 4.0
|
17 |
+
CSV_FILENAME = "metrics_log.csv"
|
18 |
+
|
19 |
+
# --- Buffers (use thread-safe versions if needed) ---
|
20 |
+
frame_buffer = deque(maxlen=int(BUFFER_DURATION_SECONDS * 30)) # Assuming ~30fps
|
21 |
+
audio_buffer = deque(maxlen=int(BUFFER_DURATION_SECONDS * SAMPLE_RATE))
|
22 |
+
frame_timestamps = deque(maxlen=int(BUFFER_DURATION_SECONDS * 30))
|
23 |
+
audio_timestamps = deque(maxlen=int(BUFFER_DURATION_SECONDS * SAMPLE_RATE)) # Timestamps per chunk
|
24 |
+
|
25 |
+
# --- MediaPipe Setup ---
|
26 |
+
mp_face_mesh = mp.solutions.face_mesh
|
27 |
+
mp_drawing = mp.solutions.drawing_utils
|
28 |
+
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
|
29 |
+
face_mesh = mp_face_mesh.FaceMesh(
|
30 |
+
max_num_faces=1,
|
31 |
+
refine_landmarks=True, # Crucial for iris/pupil
|
32 |
+
min_detection_confidence=0.5,
|
33 |
+
min_tracking_confidence=0.5)
|
34 |
+
|
35 |
+
# --- Placeholder Functions (Requires detailed implementation) ---
|
36 |
+
def analyze_video_window(frames, timestamps):
|
37 |
+
print(f"Analyzing {len(frames)} frames...")
|
38 |
+
# TODO:
|
39 |
+
# - Run MediaPipe Face Mesh + Iris on each frame
|
40 |
+
# - Extract face presence, landmarks, blink status, pupil data per frame
|
41 |
+
# - Aggregate: % face detected, avg emotion scores (if using FER), avg pupil proxy, total blinks
|
42 |
+
# - Return aggregated features
|
43 |
+
blink_count = np.random.randint(0, 5) # Placeholder
|
44 |
+
avg_pupil_proxy = np.random.rand() # Placeholder
|
45 |
+
face_detected_ratio = np.random.rand() # Placeholder
|
46 |
+
avg_valence_proxy = (np.random.rand() - 0.5) * 2 # Placeholder [-1, 1]
|
47 |
+
avg_arousal_proxy_face = np.random.rand() # Placeholder [0, 1]
|
48 |
+
return {
|
49 |
+
"blink_count": blink_count,
|
50 |
+
"avg_pupil_proxy": avg_pupil_proxy,
|
51 |
+
"face_detected_ratio": face_detected_ratio,
|
52 |
+
"avg_valence_proxy": avg_valence_proxy,
|
53 |
+
"avg_arousal_proxy_face": avg_arousal_proxy_face
|
54 |
+
}
|
55 |
+
|
56 |
+
def analyze_audio_window(audio_chunks, timestamps):
|
57 |
+
if not audio_chunks:
|
58 |
+
return {"avg_rms": 0, "avg_pitch": 0} # Default
|
59 |
+
print(f"Analyzing {len(audio_chunks)} audio chunks...")
|
60 |
+
# TODO:
|
61 |
+
# - Concatenate chunks carefully based on timestamps / expected samples
|
62 |
+
# - Run librosa: calculate RMS, pitch (e.g., pyin), maybe pauses
|
63 |
+
# - Return aggregated features
|
64 |
+
full_audio = np.concatenate(audio_chunks)
|
65 |
+
avg_rms = np.sqrt(np.mean(full_audio**2)) # Basic RMS
|
66 |
+
# Pitch estimation can be computationally expensive
|
67 |
+
# pitches, magnitudes = librosa.pyin(full_audio, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'), sr=SAMPLE_RATE)
|
68 |
+
# avg_pitch = np.nanmean(pitches) if pitches is not None and len(pitches) > 0 else 0
|
69 |
+
avg_pitch = np.random.randint(80, 300) # Placeholder
|
70 |
+
return {"avg_rms": avg_rms, "avg_pitch": avg_pitch}
|
71 |
+
|
72 |
+
|
73 |
+
def calculate_final_metrics(video_features, audio_features):
|
74 |
+
# TODO: Combine features into the final 0-1 metrics
|
75 |
+
# This requires defining heuristics or a simple model based on the features
|
76 |
+
valence = (video_features.get("avg_valence_proxy", 0) + 1) / 2 # Normalize [-1,1] to [0,1]
|
77 |
+
|
78 |
+
# Combine multiple arousal indicators (weights are examples)
|
79 |
+
arousal_face = video_features.get("avg_arousal_proxy_face", 0)
|
80 |
+
arousal_voice_rms = min(audio_features.get("avg_rms", 0) * 10, 1.0) # Scale RMS
|
81 |
+
arousal_pupil = video_features.get("avg_pupil_proxy", 0.5) # Assuming pupil proxy is 0-1
|
82 |
+
arousal = (0.4 * arousal_face + 0.3 * arousal_voice_rms + 0.3 * arousal_pupil)
|
83 |
+
|
84 |
+
engagement = video_features.get("face_detected_ratio", 0) # Simple proxy
|
85 |
+
# Could add logic based on blink rate deviations, gaze stability etc.
|
86 |
+
|
87 |
+
# Stress based on neg valence, high arousal
|
88 |
+
stress = max(0, (1.0 - valence) * arousal) # Example heuristic
|
89 |
+
|
90 |
+
# Cog load based on blink rate, pupil dilation
|
91 |
+
blink_rate = video_features.get("blink_count", 0) / PROCESSING_INTERVAL_SECONDS
|
92 |
+
# Normalize blink rate based on expected range (e.g. 0-1 Hz)
|
93 |
+
norm_blink_rate = min(blink_rate, 1.0)
|
94 |
+
cog_load = (0.5 * arousal_pupil + 0.5 * norm_blink_rate) # Example heuristic
|
95 |
+
|
96 |
+
return {
|
97 |
+
"Timestamp": time.strftime('%Y-%m-%d %H:%M:%S'),
|
98 |
+
"Valence": round(valence, 3),
|
99 |
+
"Arousal": round(arousal, 3),
|
100 |
+
"Engagement_Proxy": round(engagement, 3),
|
101 |
+
"Stress_Proxy": round(stress, 3),
|
102 |
+
"Cognitive_Load_Proxy": round(cog_load, 3),
|
103 |
+
"Blink_Rate_Hz": round(blink_rate, 3),
|
104 |
+
"Pupil_Size_Proxy": round(video_features.get("avg_pupil_proxy", 0), 3)
|
105 |
+
# --- Exclude Traits ---
|
106 |
+
}
|
107 |
+
|
108 |
+
def log_to_csv(filename, metrics_dict):
|
109 |
+
file_exists = os.path.isfile(filename)
|
110 |
+
with open(filename, 'a', newline='') as csvfile:
|
111 |
+
writer = csv.DictWriter(csvfile, fieldnames=metrics_dict.keys())
|
112 |
+
if not file_exists:
|
113 |
+
writer.writeheader() # Write header only once
|
114 |
+
writer.writerow(metrics_dict)
|
115 |
+
|
116 |
+
# --- Capture Threads (Simplified Example - Needs proper implementation) ---
|
117 |
+
video_active = True
|
118 |
+
audio_active = True
|
119 |
+
|
120 |
+
def video_capture_thread():
|
121 |
cap = cv2.VideoCapture(0)
|
122 |
+
while video_active:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
ret, frame = cap.read()
|
124 |
+
if ret:
|
125 |
+
ts = time.time()
|
126 |
+
# Make copies to avoid issues if buffer processes frame later
|
127 |
+
frame_buffer.append(frame.copy())
|
128 |
+
frame_timestamps.append(ts)
|
129 |
+
time.sleep(1/30.0) # Limit capture rate
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
cap.release()
|
131 |
+
print("Video thread stopped.")
|
132 |
+
|
133 |
+
def audio_capture_callback(indata, frames, time_info, status):
|
134 |
+
"""This is called (from a separate thread) for each audio block."""
|
135 |
+
if status:
|
136 |
+
print(status)
|
137 |
+
ts = time.time() # Timestamp the arrival of the chunk
|
138 |
+
# Make copies to avoid issues if buffer processes chunk later
|
139 |
+
audio_buffer.append(indata.copy())
|
140 |
+
audio_timestamps.append(ts) # Add timestamp for the chunk
|
141 |
+
|
142 |
+
def audio_capture_thread():
|
143 |
+
with sd.InputStream(samplerate=SAMPLE_RATE, channels=AUDIO_CHANNELS, callback=audio_capture_callback):
|
144 |
+
print("Audio stream started. Press Ctrl+C to stop.")
|
145 |
+
while audio_active:
|
146 |
+
sd.sleep(1000) # Keep thread alive while stream is running
|
147 |
+
print("Audio thread stopped.")
|
148 |
+
|
149 |
+
# --- Main Processing Logic ---
|
150 |
+
import os
|
|
|
|
|
|
|
|
|
151 |
if __name__ == "__main__":
|
152 |
+
print("Starting capture threads...")
|
153 |
+
vid_thread = threading.Thread(target=video_capture_thread, daemon=True)
|
154 |
+
aud_thread = threading.Thread(target=audio_capture_thread, daemon=True)
|
155 |
+
vid_thread.start()
|
156 |
+
aud_thread.start()
|
157 |
+
|
158 |
+
last_process_time = time.time()
|
159 |
+
|
160 |
+
try:
|
161 |
+
while True:
|
162 |
+
current_time = time.time()
|
163 |
+
if current_time - last_process_time >= PROCESSING_INTERVAL_SECONDS:
|
164 |
+
print(f"\n--- Processing window ending {time.strftime('%H:%M:%S')} ---")
|
165 |
+
window_end_time = current_time
|
166 |
+
window_start_time = window_end_time - PROCESSING_INTERVAL_SECONDS
|
167 |
+
|
168 |
+
# --- Get data for the window (Needs thread safety - locks!) ---
|
169 |
+
# This part is tricky: efficiently select items in the timestamp range
|
170 |
+
# Simple non-thread-safe example:
|
171 |
+
frames_in_window = [f for f, ts in zip(list(frame_buffer), list(frame_timestamps)) if window_start_time <= ts < window_end_time]
|
172 |
+
audio_in_window = [a for a, ts in zip(list(audio_buffer), list(audio_timestamps)) if window_start_time <= ts < window_end_time]
|
173 |
+
# In practice, you'd remove processed items from the buffer
|
174 |
+
|
175 |
+
if not frames_in_window:
|
176 |
+
print("No frames in window, skipping.")
|
177 |
+
last_process_time = current_time # Or += PROCESSING_INTERVAL_SECONDS
|
178 |
+
continue
|
179 |
+
|
180 |
+
# --- Analyze ---
|
181 |
+
video_features = analyze_video_window(frames_in_window, []) # Pass timestamps if needed
|
182 |
+
audio_features = analyze_audio_window(audio_in_window, []) # Pass timestamps if needed
|
183 |
+
|
184 |
+
# --- Calculate & Log ---
|
185 |
+
final_metrics = calculate_final_metrics(video_features, audio_features)
|
186 |
+
print("Calculated Metrics:", final_metrics)
|
187 |
+
log_to_csv(CSV_FILENAME, final_metrics)
|
188 |
+
|
189 |
+
last_process_time = current_time # Reset timer accurately
|
190 |
+
|
191 |
+
|
192 |
+
time.sleep(0.1) # Prevent busy-waiting
|
193 |
+
|
194 |
+
except KeyboardInterrupt:
|
195 |
+
print("Stopping...")
|
196 |
+
video_active = False
|
197 |
+
audio_active = False
|
198 |
+
# Wait for threads to finish
|
199 |
+
vid_thread.join(timeout=2.0)
|
200 |
+
# Audio thread stops when sd.sleep ends or stream closes
|
201 |
+
print("Done.")
|