import cv2
import sounddevice as sd
import mediapipe as mp
import numpy as np
import pandas as pd
import librosa
import threading
import time
import csv
from collections import deque

# --- Configuration ---
SAMPLE_RATE = 16000
AUDIO_CHANNELS = 1
BUFFER_DURATION_SECONDS = 10 # Keep last 10s of data
PROCESSING_INTERVAL_SECONDS = 4.0
CSV_FILENAME = "metrics_log.csv"

# --- Buffers (use thread-safe versions if needed) ---
frame_buffer = deque(maxlen=int(BUFFER_DURATION_SECONDS * 30)) # Assuming ~30fps
audio_buffer = deque(maxlen=int(BUFFER_DURATION_SECONDS * SAMPLE_RATE))
frame_timestamps = deque(maxlen=int(BUFFER_DURATION_SECONDS * 30))
audio_timestamps = deque(maxlen=int(BUFFER_DURATION_SECONDS * SAMPLE_RATE)) # Timestamps per chunk

# --- MediaPipe Setup ---
mp_face_mesh = mp.solutions.face_mesh
mp_drawing = mp.solutions.drawing_utils
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
face_mesh = mp_face_mesh.FaceMesh(
    max_num_faces=1,
    refine_landmarks=True, # Crucial for iris/pupil
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5)

# --- Placeholder Functions (Requires detailed implementation) ---
def analyze_video_window(frames, timestamps):
    print(f"Analyzing {len(frames)} frames...")
    # TODO:
    # - Run MediaPipe Face Mesh + Iris on each frame
    # - Extract face presence, landmarks, blink status, pupil data per frame
    # - Aggregate: % face detected, avg emotion scores (if using FER), avg pupil proxy, total blinks
    # - Return aggregated features
    blink_count = np.random.randint(0, 5) # Placeholder
    avg_pupil_proxy = np.random.rand() # Placeholder
    face_detected_ratio = np.random.rand() # Placeholder
    avg_valence_proxy = (np.random.rand() - 0.5) * 2 # Placeholder [-1, 1]
    avg_arousal_proxy_face = np.random.rand() # Placeholder [0, 1]
    return {
        "blink_count": blink_count,
        "avg_pupil_proxy": avg_pupil_proxy,
        "face_detected_ratio": face_detected_ratio,
        "avg_valence_proxy": avg_valence_proxy,
        "avg_arousal_proxy_face": avg_arousal_proxy_face
    }

def analyze_audio_window(audio_chunks, timestamps):
    if not audio_chunks:
        return {"avg_rms": 0, "avg_pitch": 0} # Default
    print(f"Analyzing {len(audio_chunks)} audio chunks...")
    # TODO:
    # - Concatenate chunks carefully based on timestamps / expected samples
    # - Run librosa: calculate RMS, pitch (e.g., pyin), maybe pauses
    # - Return aggregated features
    full_audio = np.concatenate(audio_chunks)
    avg_rms = np.sqrt(np.mean(full_audio**2)) # Basic RMS
    # Pitch estimation can be computationally expensive
    # pitches, magnitudes = librosa.pyin(full_audio, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'), sr=SAMPLE_RATE)
    # avg_pitch = np.nanmean(pitches) if pitches is not None and len(pitches) > 0 else 0
    avg_pitch = np.random.randint(80, 300) # Placeholder
    return {"avg_rms": avg_rms, "avg_pitch": avg_pitch}


def calculate_final_metrics(video_features, audio_features):
    # TODO: Combine features into the final 0-1 metrics
    # This requires defining heuristics or a simple model based on the features
    valence = (video_features.get("avg_valence_proxy", 0) + 1) / 2 # Normalize [-1,1] to [0,1]

    # Combine multiple arousal indicators (weights are examples)
    arousal_face = video_features.get("avg_arousal_proxy_face", 0)
    arousal_voice_rms = min(audio_features.get("avg_rms", 0) * 10, 1.0) # Scale RMS
    arousal_pupil = video_features.get("avg_pupil_proxy", 0.5) # Assuming pupil proxy is 0-1
    arousal = (0.4 * arousal_face + 0.3 * arousal_voice_rms + 0.3 * arousal_pupil)

    engagement = video_features.get("face_detected_ratio", 0) # Simple proxy
    # Could add logic based on blink rate deviations, gaze stability etc.

    # Stress based on neg valence, high arousal
    stress = max(0, (1.0 - valence) * arousal) # Example heuristic

    # Cog load based on blink rate, pupil dilation
    blink_rate = video_features.get("blink_count", 0) / PROCESSING_INTERVAL_SECONDS
    # Normalize blink rate based on expected range (e.g. 0-1 Hz)
    norm_blink_rate = min(blink_rate, 1.0)
    cog_load = (0.5 * arousal_pupil + 0.5 * norm_blink_rate) # Example heuristic

    return {
        "Timestamp": time.strftime('%Y-%m-%d %H:%M:%S'),
        "Valence": round(valence, 3),
        "Arousal": round(arousal, 3),
        "Engagement_Proxy": round(engagement, 3),
        "Stress_Proxy": round(stress, 3),
        "Cognitive_Load_Proxy": round(cog_load, 3),
        "Blink_Rate_Hz": round(blink_rate, 3),
        "Pupil_Size_Proxy": round(video_features.get("avg_pupil_proxy", 0), 3)
        # --- Exclude Traits ---
    }

def log_to_csv(filename, metrics_dict):
    file_exists = os.path.isfile(filename)
    with open(filename, 'a', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=metrics_dict.keys())
        if not file_exists:
            writer.writeheader() # Write header only once
        writer.writerow(metrics_dict)

# --- Capture Threads (Simplified Example - Needs proper implementation) ---
video_active = True
audio_active = True

def video_capture_thread():
    cap = cv2.VideoCapture(0)
    while video_active:
        ret, frame = cap.read()
        if ret:
            ts = time.time()
            # Make copies to avoid issues if buffer processes frame later
            frame_buffer.append(frame.copy())
            frame_timestamps.append(ts)
        time.sleep(1/30.0) # Limit capture rate
    cap.release()
    print("Video thread stopped.")

def audio_capture_callback(indata, frames, time_info, status):
    """This is called (from a separate thread) for each audio block."""
    if status:
        print(status)
    ts = time.time() # Timestamp the arrival of the chunk
    # Make copies to avoid issues if buffer processes chunk later
    audio_buffer.append(indata.copy())
    audio_timestamps.append(ts) # Add timestamp for the chunk

def audio_capture_thread():
    with sd.InputStream(samplerate=SAMPLE_RATE, channels=AUDIO_CHANNELS, callback=audio_capture_callback):
        print("Audio stream started. Press Ctrl+C to stop.")
        while audio_active:
            sd.sleep(1000) # Keep thread alive while stream is running
    print("Audio thread stopped.")

# --- Main Processing Logic ---
import os
if __name__ == "__main__":
    print("Starting capture threads...")
    vid_thread = threading.Thread(target=video_capture_thread, daemon=True)
    aud_thread = threading.Thread(target=audio_capture_thread, daemon=True)
    vid_thread.start()
    aud_thread.start()

    last_process_time = time.time()

    try:
        while True:
            current_time = time.time()
            if current_time - last_process_time >= PROCESSING_INTERVAL_SECONDS:
                print(f"\n--- Processing window ending {time.strftime('%H:%M:%S')} ---")
                window_end_time = current_time
                window_start_time = window_end_time - PROCESSING_INTERVAL_SECONDS

                # --- Get data for the window (Needs thread safety - locks!) ---
                # This part is tricky: efficiently select items in the timestamp range
                # Simple non-thread-safe example:
                frames_in_window = [f for f, ts in zip(list(frame_buffer), list(frame_timestamps)) if window_start_time <= ts < window_end_time]
                audio_in_window = [a for a, ts in zip(list(audio_buffer), list(audio_timestamps)) if window_start_time <= ts < window_end_time]
                # In practice, you'd remove processed items from the buffer

                if not frames_in_window:
                    print("No frames in window, skipping.")
                    last_process_time = current_time # Or += PROCESSING_INTERVAL_SECONDS
                    continue

                # --- Analyze ---
                video_features = analyze_video_window(frames_in_window, []) # Pass timestamps if needed
                audio_features = analyze_audio_window(audio_in_window, []) # Pass timestamps if needed

                # --- Calculate & Log ---
                final_metrics = calculate_final_metrics(video_features, audio_features)
                print("Calculated Metrics:", final_metrics)
                log_to_csv(CSV_FILENAME, final_metrics)

                last_process_time = current_time # Reset timer accurately


            time.sleep(0.1) # Prevent busy-waiting

    except KeyboardInterrupt:
        print("Stopping...")
        video_active = False
        audio_active = False
        # Wait for threads to finish
        vid_thread.join(timeout=2.0)
        # Audio thread stops when sd.sleep ends or stream closes
        print("Done.")