File size: 8,724 Bytes
9c2bcdf
c93aa8f
 
 
 
 
 
9c2bcdf
 
c93aa8f
9c2bcdf
c93aa8f
9c2bcdf
c93aa8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c2bcdf
c93aa8f
9c2bcdf
c93aa8f
 
 
 
 
 
9c2bcdf
c93aa8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c2bcdf
c93aa8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
import cv2
import sounddevice as sd
import mediapipe as mp
import numpy as np
import pandas as pd
import librosa
import threading
import time
import csv
from collections import deque

# --- Configuration ---
SAMPLE_RATE = 16000
AUDIO_CHANNELS = 1
BUFFER_DURATION_SECONDS = 10 # Keep last 10s of data
PROCESSING_INTERVAL_SECONDS = 4.0
CSV_FILENAME = "metrics_log.csv"

# --- Buffers (use thread-safe versions if needed) ---
frame_buffer = deque(maxlen=int(BUFFER_DURATION_SECONDS * 30)) # Assuming ~30fps
audio_buffer = deque(maxlen=int(BUFFER_DURATION_SECONDS * SAMPLE_RATE))
frame_timestamps = deque(maxlen=int(BUFFER_DURATION_SECONDS * 30))
audio_timestamps = deque(maxlen=int(BUFFER_DURATION_SECONDS * SAMPLE_RATE)) # Timestamps per chunk

# --- MediaPipe Setup ---
mp_face_mesh = mp.solutions.face_mesh
mp_drawing = mp.solutions.drawing_utils
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
face_mesh = mp_face_mesh.FaceMesh(
    max_num_faces=1,
    refine_landmarks=True, # Crucial for iris/pupil
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5)

# --- Placeholder Functions (Requires detailed implementation) ---
def analyze_video_window(frames, timestamps):
    print(f"Analyzing {len(frames)} frames...")
    # TODO:
    # - Run MediaPipe Face Mesh + Iris on each frame
    # - Extract face presence, landmarks, blink status, pupil data per frame
    # - Aggregate: % face detected, avg emotion scores (if using FER), avg pupil proxy, total blinks
    # - Return aggregated features
    blink_count = np.random.randint(0, 5) # Placeholder
    avg_pupil_proxy = np.random.rand() # Placeholder
    face_detected_ratio = np.random.rand() # Placeholder
    avg_valence_proxy = (np.random.rand() - 0.5) * 2 # Placeholder [-1, 1]
    avg_arousal_proxy_face = np.random.rand() # Placeholder [0, 1]
    return {
        "blink_count": blink_count,
        "avg_pupil_proxy": avg_pupil_proxy,
        "face_detected_ratio": face_detected_ratio,
        "avg_valence_proxy": avg_valence_proxy,
        "avg_arousal_proxy_face": avg_arousal_proxy_face
    }

def analyze_audio_window(audio_chunks, timestamps):
    if not audio_chunks:
        return {"avg_rms": 0, "avg_pitch": 0} # Default
    print(f"Analyzing {len(audio_chunks)} audio chunks...")
    # TODO:
    # - Concatenate chunks carefully based on timestamps / expected samples
    # - Run librosa: calculate RMS, pitch (e.g., pyin), maybe pauses
    # - Return aggregated features
    full_audio = np.concatenate(audio_chunks)
    avg_rms = np.sqrt(np.mean(full_audio**2)) # Basic RMS
    # Pitch estimation can be computationally expensive
    # pitches, magnitudes = librosa.pyin(full_audio, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'), sr=SAMPLE_RATE)
    # avg_pitch = np.nanmean(pitches) if pitches is not None and len(pitches) > 0 else 0
    avg_pitch = np.random.randint(80, 300) # Placeholder
    return {"avg_rms": avg_rms, "avg_pitch": avg_pitch}


def calculate_final_metrics(video_features, audio_features):
    # TODO: Combine features into the final 0-1 metrics
    # This requires defining heuristics or a simple model based on the features
    valence = (video_features.get("avg_valence_proxy", 0) + 1) / 2 # Normalize [-1,1] to [0,1]

    # Combine multiple arousal indicators (weights are examples)
    arousal_face = video_features.get("avg_arousal_proxy_face", 0)
    arousal_voice_rms = min(audio_features.get("avg_rms", 0) * 10, 1.0) # Scale RMS
    arousal_pupil = video_features.get("avg_pupil_proxy", 0.5) # Assuming pupil proxy is 0-1
    arousal = (0.4 * arousal_face + 0.3 * arousal_voice_rms + 0.3 * arousal_pupil)

    engagement = video_features.get("face_detected_ratio", 0) # Simple proxy
    # Could add logic based on blink rate deviations, gaze stability etc.

    # Stress based on neg valence, high arousal
    stress = max(0, (1.0 - valence) * arousal) # Example heuristic

    # Cog load based on blink rate, pupil dilation
    blink_rate = video_features.get("blink_count", 0) / PROCESSING_INTERVAL_SECONDS
    # Normalize blink rate based on expected range (e.g. 0-1 Hz)
    norm_blink_rate = min(blink_rate, 1.0)
    cog_load = (0.5 * arousal_pupil + 0.5 * norm_blink_rate) # Example heuristic

    return {
        "Timestamp": time.strftime('%Y-%m-%d %H:%M:%S'),
        "Valence": round(valence, 3),
        "Arousal": round(arousal, 3),
        "Engagement_Proxy": round(engagement, 3),
        "Stress_Proxy": round(stress, 3),
        "Cognitive_Load_Proxy": round(cog_load, 3),
        "Blink_Rate_Hz": round(blink_rate, 3),
        "Pupil_Size_Proxy": round(video_features.get("avg_pupil_proxy", 0), 3)
        # --- Exclude Traits ---
    }

def log_to_csv(filename, metrics_dict):
    file_exists = os.path.isfile(filename)
    with open(filename, 'a', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=metrics_dict.keys())
        if not file_exists:
            writer.writeheader() # Write header only once
        writer.writerow(metrics_dict)

# --- Capture Threads (Simplified Example - Needs proper implementation) ---
video_active = True
audio_active = True

def video_capture_thread():
    cap = cv2.VideoCapture(0)
    while video_active:
        ret, frame = cap.read()
        if ret:
            ts = time.time()
            # Make copies to avoid issues if buffer processes frame later
            frame_buffer.append(frame.copy())
            frame_timestamps.append(ts)
        time.sleep(1/30.0) # Limit capture rate
    cap.release()
    print("Video thread stopped.")

def audio_capture_callback(indata, frames, time_info, status):
    """This is called (from a separate thread) for each audio block."""
    if status:
        print(status)
    ts = time.time() # Timestamp the arrival of the chunk
    # Make copies to avoid issues if buffer processes chunk later
    audio_buffer.append(indata.copy())
    audio_timestamps.append(ts) # Add timestamp for the chunk

def audio_capture_thread():
    with sd.InputStream(samplerate=SAMPLE_RATE, channels=AUDIO_CHANNELS, callback=audio_capture_callback):
        print("Audio stream started. Press Ctrl+C to stop.")
        while audio_active:
            sd.sleep(1000) # Keep thread alive while stream is running
    print("Audio thread stopped.")

# --- Main Processing Logic ---
import os
if __name__ == "__main__":
    print("Starting capture threads...")
    vid_thread = threading.Thread(target=video_capture_thread, daemon=True)
    aud_thread = threading.Thread(target=audio_capture_thread, daemon=True)
    vid_thread.start()
    aud_thread.start()

    last_process_time = time.time()

    try:
        while True:
            current_time = time.time()
            if current_time - last_process_time >= PROCESSING_INTERVAL_SECONDS:
                print(f"\n--- Processing window ending {time.strftime('%H:%M:%S')} ---")
                window_end_time = current_time
                window_start_time = window_end_time - PROCESSING_INTERVAL_SECONDS

                # --- Get data for the window (Needs thread safety - locks!) ---
                # This part is tricky: efficiently select items in the timestamp range
                # Simple non-thread-safe example:
                frames_in_window = [f for f, ts in zip(list(frame_buffer), list(frame_timestamps)) if window_start_time <= ts < window_end_time]
                audio_in_window = [a for a, ts in zip(list(audio_buffer), list(audio_timestamps)) if window_start_time <= ts < window_end_time]
                # In practice, you'd remove processed items from the buffer

                if not frames_in_window:
                    print("No frames in window, skipping.")
                    last_process_time = current_time # Or += PROCESSING_INTERVAL_SECONDS
                    continue

                # --- Analyze ---
                video_features = analyze_video_window(frames_in_window, []) # Pass timestamps if needed
                audio_features = analyze_audio_window(audio_in_window, []) # Pass timestamps if needed

                # --- Calculate & Log ---
                final_metrics = calculate_final_metrics(video_features, audio_features)
                print("Calculated Metrics:", final_metrics)
                log_to_csv(CSV_FILENAME, final_metrics)

                last_process_time = current_time # Reset timer accurately


            time.sleep(0.1) # Prevent busy-waiting

    except KeyboardInterrupt:
        print("Stopping...")
        video_active = False
        audio_active = False
        # Wait for threads to finish
        vid_thread.join(timeout=2.0)
        # Audio thread stops when sd.sleep ends or stream closes
        print("Done.")