File size: 8,724 Bytes
9c2bcdf c93aa8f 9c2bcdf c93aa8f 9c2bcdf c93aa8f 9c2bcdf c93aa8f 9c2bcdf c93aa8f 9c2bcdf c93aa8f 9c2bcdf c93aa8f 9c2bcdf c93aa8f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
import cv2
import sounddevice as sd
import mediapipe as mp
import numpy as np
import pandas as pd
import librosa
import threading
import time
import csv
from collections import deque
# --- Configuration ---
SAMPLE_RATE = 16000
AUDIO_CHANNELS = 1
BUFFER_DURATION_SECONDS = 10 # Keep last 10s of data
PROCESSING_INTERVAL_SECONDS = 4.0
CSV_FILENAME = "metrics_log.csv"
# --- Buffers (use thread-safe versions if needed) ---
frame_buffer = deque(maxlen=int(BUFFER_DURATION_SECONDS * 30)) # Assuming ~30fps
audio_buffer = deque(maxlen=int(BUFFER_DURATION_SECONDS * SAMPLE_RATE))
frame_timestamps = deque(maxlen=int(BUFFER_DURATION_SECONDS * 30))
audio_timestamps = deque(maxlen=int(BUFFER_DURATION_SECONDS * SAMPLE_RATE)) # Timestamps per chunk
# --- MediaPipe Setup ---
mp_face_mesh = mp.solutions.face_mesh
mp_drawing = mp.solutions.drawing_utils
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
face_mesh = mp_face_mesh.FaceMesh(
max_num_faces=1,
refine_landmarks=True, # Crucial for iris/pupil
min_detection_confidence=0.5,
min_tracking_confidence=0.5)
# --- Placeholder Functions (Requires detailed implementation) ---
def analyze_video_window(frames, timestamps):
print(f"Analyzing {len(frames)} frames...")
# TODO:
# - Run MediaPipe Face Mesh + Iris on each frame
# - Extract face presence, landmarks, blink status, pupil data per frame
# - Aggregate: % face detected, avg emotion scores (if using FER), avg pupil proxy, total blinks
# - Return aggregated features
blink_count = np.random.randint(0, 5) # Placeholder
avg_pupil_proxy = np.random.rand() # Placeholder
face_detected_ratio = np.random.rand() # Placeholder
avg_valence_proxy = (np.random.rand() - 0.5) * 2 # Placeholder [-1, 1]
avg_arousal_proxy_face = np.random.rand() # Placeholder [0, 1]
return {
"blink_count": blink_count,
"avg_pupil_proxy": avg_pupil_proxy,
"face_detected_ratio": face_detected_ratio,
"avg_valence_proxy": avg_valence_proxy,
"avg_arousal_proxy_face": avg_arousal_proxy_face
}
def analyze_audio_window(audio_chunks, timestamps):
if not audio_chunks:
return {"avg_rms": 0, "avg_pitch": 0} # Default
print(f"Analyzing {len(audio_chunks)} audio chunks...")
# TODO:
# - Concatenate chunks carefully based on timestamps / expected samples
# - Run librosa: calculate RMS, pitch (e.g., pyin), maybe pauses
# - Return aggregated features
full_audio = np.concatenate(audio_chunks)
avg_rms = np.sqrt(np.mean(full_audio**2)) # Basic RMS
# Pitch estimation can be computationally expensive
# pitches, magnitudes = librosa.pyin(full_audio, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'), sr=SAMPLE_RATE)
# avg_pitch = np.nanmean(pitches) if pitches is not None and len(pitches) > 0 else 0
avg_pitch = np.random.randint(80, 300) # Placeholder
return {"avg_rms": avg_rms, "avg_pitch": avg_pitch}
def calculate_final_metrics(video_features, audio_features):
# TODO: Combine features into the final 0-1 metrics
# This requires defining heuristics or a simple model based on the features
valence = (video_features.get("avg_valence_proxy", 0) + 1) / 2 # Normalize [-1,1] to [0,1]
# Combine multiple arousal indicators (weights are examples)
arousal_face = video_features.get("avg_arousal_proxy_face", 0)
arousal_voice_rms = min(audio_features.get("avg_rms", 0) * 10, 1.0) # Scale RMS
arousal_pupil = video_features.get("avg_pupil_proxy", 0.5) # Assuming pupil proxy is 0-1
arousal = (0.4 * arousal_face + 0.3 * arousal_voice_rms + 0.3 * arousal_pupil)
engagement = video_features.get("face_detected_ratio", 0) # Simple proxy
# Could add logic based on blink rate deviations, gaze stability etc.
# Stress based on neg valence, high arousal
stress = max(0, (1.0 - valence) * arousal) # Example heuristic
# Cog load based on blink rate, pupil dilation
blink_rate = video_features.get("blink_count", 0) / PROCESSING_INTERVAL_SECONDS
# Normalize blink rate based on expected range (e.g. 0-1 Hz)
norm_blink_rate = min(blink_rate, 1.0)
cog_load = (0.5 * arousal_pupil + 0.5 * norm_blink_rate) # Example heuristic
return {
"Timestamp": time.strftime('%Y-%m-%d %H:%M:%S'),
"Valence": round(valence, 3),
"Arousal": round(arousal, 3),
"Engagement_Proxy": round(engagement, 3),
"Stress_Proxy": round(stress, 3),
"Cognitive_Load_Proxy": round(cog_load, 3),
"Blink_Rate_Hz": round(blink_rate, 3),
"Pupil_Size_Proxy": round(video_features.get("avg_pupil_proxy", 0), 3)
# --- Exclude Traits ---
}
def log_to_csv(filename, metrics_dict):
file_exists = os.path.isfile(filename)
with open(filename, 'a', newline='') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=metrics_dict.keys())
if not file_exists:
writer.writeheader() # Write header only once
writer.writerow(metrics_dict)
# --- Capture Threads (Simplified Example - Needs proper implementation) ---
video_active = True
audio_active = True
def video_capture_thread():
cap = cv2.VideoCapture(0)
while video_active:
ret, frame = cap.read()
if ret:
ts = time.time()
# Make copies to avoid issues if buffer processes frame later
frame_buffer.append(frame.copy())
frame_timestamps.append(ts)
time.sleep(1/30.0) # Limit capture rate
cap.release()
print("Video thread stopped.")
def audio_capture_callback(indata, frames, time_info, status):
"""This is called (from a separate thread) for each audio block."""
if status:
print(status)
ts = time.time() # Timestamp the arrival of the chunk
# Make copies to avoid issues if buffer processes chunk later
audio_buffer.append(indata.copy())
audio_timestamps.append(ts) # Add timestamp for the chunk
def audio_capture_thread():
with sd.InputStream(samplerate=SAMPLE_RATE, channels=AUDIO_CHANNELS, callback=audio_capture_callback):
print("Audio stream started. Press Ctrl+C to stop.")
while audio_active:
sd.sleep(1000) # Keep thread alive while stream is running
print("Audio thread stopped.")
# --- Main Processing Logic ---
import os
if __name__ == "__main__":
print("Starting capture threads...")
vid_thread = threading.Thread(target=video_capture_thread, daemon=True)
aud_thread = threading.Thread(target=audio_capture_thread, daemon=True)
vid_thread.start()
aud_thread.start()
last_process_time = time.time()
try:
while True:
current_time = time.time()
if current_time - last_process_time >= PROCESSING_INTERVAL_SECONDS:
print(f"\n--- Processing window ending {time.strftime('%H:%M:%S')} ---")
window_end_time = current_time
window_start_time = window_end_time - PROCESSING_INTERVAL_SECONDS
# --- Get data for the window (Needs thread safety - locks!) ---
# This part is tricky: efficiently select items in the timestamp range
# Simple non-thread-safe example:
frames_in_window = [f for f, ts in zip(list(frame_buffer), list(frame_timestamps)) if window_start_time <= ts < window_end_time]
audio_in_window = [a for a, ts in zip(list(audio_buffer), list(audio_timestamps)) if window_start_time <= ts < window_end_time]
# In practice, you'd remove processed items from the buffer
if not frames_in_window:
print("No frames in window, skipping.")
last_process_time = current_time # Or += PROCESSING_INTERVAL_SECONDS
continue
# --- Analyze ---
video_features = analyze_video_window(frames_in_window, []) # Pass timestamps if needed
audio_features = analyze_audio_window(audio_in_window, []) # Pass timestamps if needed
# --- Calculate & Log ---
final_metrics = calculate_final_metrics(video_features, audio_features)
print("Calculated Metrics:", final_metrics)
log_to_csv(CSV_FILENAME, final_metrics)
last_process_time = current_time # Reset timer accurately
time.sleep(0.1) # Prevent busy-waiting
except KeyboardInterrupt:
print("Stopping...")
video_active = False
audio_active = False
# Wait for threads to finish
vid_thread.join(timeout=2.0)
# Audio thread stops when sd.sleep ends or stream closes
print("Done.") |