|
import cv2 |
|
import sounddevice as sd |
|
import mediapipe as mp |
|
import numpy as np |
|
import pandas as pd |
|
import librosa |
|
import threading |
|
import time |
|
import csv |
|
from collections import deque |
|
|
|
|
|
SAMPLE_RATE = 16000 |
|
AUDIO_CHANNELS = 1 |
|
BUFFER_DURATION_SECONDS = 10 |
|
PROCESSING_INTERVAL_SECONDS = 4.0 |
|
CSV_FILENAME = "metrics_log.csv" |
|
|
|
|
|
frame_buffer = deque(maxlen=int(BUFFER_DURATION_SECONDS * 30)) |
|
audio_buffer = deque(maxlen=int(BUFFER_DURATION_SECONDS * SAMPLE_RATE)) |
|
frame_timestamps = deque(maxlen=int(BUFFER_DURATION_SECONDS * 30)) |
|
audio_timestamps = deque(maxlen=int(BUFFER_DURATION_SECONDS * SAMPLE_RATE)) |
|
|
|
|
|
mp_face_mesh = mp.solutions.face_mesh |
|
mp_drawing = mp.solutions.drawing_utils |
|
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1) |
|
face_mesh = mp_face_mesh.FaceMesh( |
|
max_num_faces=1, |
|
refine_landmarks=True, |
|
min_detection_confidence=0.5, |
|
min_tracking_confidence=0.5) |
|
|
|
|
|
def analyze_video_window(frames, timestamps): |
|
print(f"Analyzing {len(frames)} frames...") |
|
|
|
|
|
|
|
|
|
|
|
blink_count = np.random.randint(0, 5) |
|
avg_pupil_proxy = np.random.rand() |
|
face_detected_ratio = np.random.rand() |
|
avg_valence_proxy = (np.random.rand() - 0.5) * 2 |
|
avg_arousal_proxy_face = np.random.rand() |
|
return { |
|
"blink_count": blink_count, |
|
"avg_pupil_proxy": avg_pupil_proxy, |
|
"face_detected_ratio": face_detected_ratio, |
|
"avg_valence_proxy": avg_valence_proxy, |
|
"avg_arousal_proxy_face": avg_arousal_proxy_face |
|
} |
|
|
|
def analyze_audio_window(audio_chunks, timestamps): |
|
if not audio_chunks: |
|
return {"avg_rms": 0, "avg_pitch": 0} |
|
print(f"Analyzing {len(audio_chunks)} audio chunks...") |
|
|
|
|
|
|
|
|
|
full_audio = np.concatenate(audio_chunks) |
|
avg_rms = np.sqrt(np.mean(full_audio**2)) |
|
|
|
|
|
|
|
avg_pitch = np.random.randint(80, 300) |
|
return {"avg_rms": avg_rms, "avg_pitch": avg_pitch} |
|
|
|
|
|
def calculate_final_metrics(video_features, audio_features): |
|
|
|
|
|
valence = (video_features.get("avg_valence_proxy", 0) + 1) / 2 |
|
|
|
|
|
arousal_face = video_features.get("avg_arousal_proxy_face", 0) |
|
arousal_voice_rms = min(audio_features.get("avg_rms", 0) * 10, 1.0) |
|
arousal_pupil = video_features.get("avg_pupil_proxy", 0.5) |
|
arousal = (0.4 * arousal_face + 0.3 * arousal_voice_rms + 0.3 * arousal_pupil) |
|
|
|
engagement = video_features.get("face_detected_ratio", 0) |
|
|
|
|
|
|
|
stress = max(0, (1.0 - valence) * arousal) |
|
|
|
|
|
blink_rate = video_features.get("blink_count", 0) / PROCESSING_INTERVAL_SECONDS |
|
|
|
norm_blink_rate = min(blink_rate, 1.0) |
|
cog_load = (0.5 * arousal_pupil + 0.5 * norm_blink_rate) |
|
|
|
return { |
|
"Timestamp": time.strftime('%Y-%m-%d %H:%M:%S'), |
|
"Valence": round(valence, 3), |
|
"Arousal": round(arousal, 3), |
|
"Engagement_Proxy": round(engagement, 3), |
|
"Stress_Proxy": round(stress, 3), |
|
"Cognitive_Load_Proxy": round(cog_load, 3), |
|
"Blink_Rate_Hz": round(blink_rate, 3), |
|
"Pupil_Size_Proxy": round(video_features.get("avg_pupil_proxy", 0), 3) |
|
|
|
} |
|
|
|
def log_to_csv(filename, metrics_dict): |
|
file_exists = os.path.isfile(filename) |
|
with open(filename, 'a', newline='') as csvfile: |
|
writer = csv.DictWriter(csvfile, fieldnames=metrics_dict.keys()) |
|
if not file_exists: |
|
writer.writeheader() |
|
writer.writerow(metrics_dict) |
|
|
|
|
|
video_active = True |
|
audio_active = True |
|
|
|
def video_capture_thread(): |
|
cap = cv2.VideoCapture(0) |
|
while video_active: |
|
ret, frame = cap.read() |
|
if ret: |
|
ts = time.time() |
|
|
|
frame_buffer.append(frame.copy()) |
|
frame_timestamps.append(ts) |
|
time.sleep(1/30.0) |
|
cap.release() |
|
print("Video thread stopped.") |
|
|
|
def audio_capture_callback(indata, frames, time_info, status): |
|
"""This is called (from a separate thread) for each audio block.""" |
|
if status: |
|
print(status) |
|
ts = time.time() |
|
|
|
audio_buffer.append(indata.copy()) |
|
audio_timestamps.append(ts) |
|
|
|
def audio_capture_thread(): |
|
with sd.InputStream(samplerate=SAMPLE_RATE, channels=AUDIO_CHANNELS, callback=audio_capture_callback): |
|
print("Audio stream started. Press Ctrl+C to stop.") |
|
while audio_active: |
|
sd.sleep(1000) |
|
print("Audio thread stopped.") |
|
|
|
|
|
import os |
|
if __name__ == "__main__": |
|
print("Starting capture threads...") |
|
vid_thread = threading.Thread(target=video_capture_thread, daemon=True) |
|
aud_thread = threading.Thread(target=audio_capture_thread, daemon=True) |
|
vid_thread.start() |
|
aud_thread.start() |
|
|
|
last_process_time = time.time() |
|
|
|
try: |
|
while True: |
|
current_time = time.time() |
|
if current_time - last_process_time >= PROCESSING_INTERVAL_SECONDS: |
|
print(f"\n--- Processing window ending {time.strftime('%H:%M:%S')} ---") |
|
window_end_time = current_time |
|
window_start_time = window_end_time - PROCESSING_INTERVAL_SECONDS |
|
|
|
|
|
|
|
|
|
frames_in_window = [f for f, ts in zip(list(frame_buffer), list(frame_timestamps)) if window_start_time <= ts < window_end_time] |
|
audio_in_window = [a for a, ts in zip(list(audio_buffer), list(audio_timestamps)) if window_start_time <= ts < window_end_time] |
|
|
|
|
|
if not frames_in_window: |
|
print("No frames in window, skipping.") |
|
last_process_time = current_time |
|
continue |
|
|
|
|
|
video_features = analyze_video_window(frames_in_window, []) |
|
audio_features = analyze_audio_window(audio_in_window, []) |
|
|
|
|
|
final_metrics = calculate_final_metrics(video_features, audio_features) |
|
print("Calculated Metrics:", final_metrics) |
|
log_to_csv(CSV_FILENAME, final_metrics) |
|
|
|
last_process_time = current_time |
|
|
|
|
|
time.sleep(0.1) |
|
|
|
except KeyboardInterrupt: |
|
print("Stopping...") |
|
video_active = False |
|
audio_active = False |
|
|
|
vid_thread.join(timeout=2.0) |
|
|
|
print("Done.") |