Spaces:

ans123
/

EMOTIV

Sleeping

App Files Files Community

EMOTIV / app.py

ans123

Update app.py

c93aa8f verified 3 months ago

raw

history blame

8.72 kB

	import cv2
	import sounddevice as sd
	import mediapipe as mp
	import numpy as np
	import pandas as pd
	import librosa
	import threading
	import time
	import csv
	from collections import deque

	# --- Configuration ---
	SAMPLE_RATE = 16000
	AUDIO_CHANNELS = 1
	BUFFER_DURATION_SECONDS = 10 # Keep last 10s of data
	PROCESSING_INTERVAL_SECONDS = 4.0
	CSV_FILENAME = "metrics_log.csv"

	# --- Buffers (use thread-safe versions if needed) ---
	frame_buffer = deque(maxlen=int(BUFFER_DURATION_SECONDS * 30)) # Assuming ~30fps
	audio_buffer = deque(maxlen=int(BUFFER_DURATION_SECONDS * SAMPLE_RATE))
	frame_timestamps = deque(maxlen=int(BUFFER_DURATION_SECONDS * 30))
	audio_timestamps = deque(maxlen=int(BUFFER_DURATION_SECONDS * SAMPLE_RATE)) # Timestamps per chunk

	# --- MediaPipe Setup ---
	mp_face_mesh = mp.solutions.face_mesh
	mp_drawing = mp.solutions.drawing_utils
	drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
	face_mesh = mp_face_mesh.FaceMesh(
	max_num_faces=1,
	refine_landmarks=True, # Crucial for iris/pupil
	min_detection_confidence=0.5,
	min_tracking_confidence=0.5)

	# --- Placeholder Functions (Requires detailed implementation) ---
	def analyze_video_window(frames, timestamps):
	print(f"Analyzing {len(frames)} frames...")
	# TODO:
	# - Run MediaPipe Face Mesh + Iris on each frame
	# - Extract face presence, landmarks, blink status, pupil data per frame
	# - Aggregate: % face detected, avg emotion scores (if using FER), avg pupil proxy, total blinks
	# - Return aggregated features
	blink_count = np.random.randint(0, 5) # Placeholder
	avg_pupil_proxy = np.random.rand() # Placeholder
	face_detected_ratio = np.random.rand() # Placeholder
	avg_valence_proxy = (np.random.rand() - 0.5) * 2 # Placeholder [-1, 1]
	avg_arousal_proxy_face = np.random.rand() # Placeholder [0, 1]
	return {
	"blink_count": blink_count,
	"avg_pupil_proxy": avg_pupil_proxy,
	"face_detected_ratio": face_detected_ratio,
	"avg_valence_proxy": avg_valence_proxy,
	"avg_arousal_proxy_face": avg_arousal_proxy_face
	}

	def analyze_audio_window(audio_chunks, timestamps):
	if not audio_chunks:
	return {"avg_rms": 0, "avg_pitch": 0} # Default
	print(f"Analyzing {len(audio_chunks)} audio chunks...")
	# TODO:
	# - Concatenate chunks carefully based on timestamps / expected samples
	# - Run librosa: calculate RMS, pitch (e.g., pyin), maybe pauses
	# - Return aggregated features
	full_audio = np.concatenate(audio_chunks)
	avg_rms = np.sqrt(np.mean(full_audio**2)) # Basic RMS
	# Pitch estimation can be computationally expensive
	# pitches, magnitudes = librosa.pyin(full_audio, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'), sr=SAMPLE_RATE)
	# avg_pitch = np.nanmean(pitches) if pitches is not None and len(pitches) > 0 else 0
	avg_pitch = np.random.randint(80, 300) # Placeholder
	return {"avg_rms": avg_rms, "avg_pitch": avg_pitch}


	def calculate_final_metrics(video_features, audio_features):
	# TODO: Combine features into the final 0-1 metrics
	# This requires defining heuristics or a simple model based on the features
	valence = (video_features.get("avg_valence_proxy", 0) + 1) / 2 # Normalize [-1,1] to [0,1]

	# Combine multiple arousal indicators (weights are examples)
	arousal_face = video_features.get("avg_arousal_proxy_face", 0)
	arousal_voice_rms = min(audio_features.get("avg_rms", 0) * 10, 1.0) # Scale RMS
	arousal_pupil = video_features.get("avg_pupil_proxy", 0.5) # Assuming pupil proxy is 0-1
	arousal = (0.4 * arousal_face + 0.3 * arousal_voice_rms + 0.3 * arousal_pupil)

	engagement = video_features.get("face_detected_ratio", 0) # Simple proxy
	# Could add logic based on blink rate deviations, gaze stability etc.

	# Stress based on neg valence, high arousal
	stress = max(0, (1.0 - valence) * arousal) # Example heuristic

	# Cog load based on blink rate, pupil dilation
	blink_rate = video_features.get("blink_count", 0) / PROCESSING_INTERVAL_SECONDS
	# Normalize blink rate based on expected range (e.g. 0-1 Hz)
	norm_blink_rate = min(blink_rate, 1.0)
	cog_load = (0.5 * arousal_pupil + 0.5 * norm_blink_rate) # Example heuristic

	return {
	"Timestamp": time.strftime('%Y-%m-%d %H:%M:%S'),
	"Valence": round(valence, 3),
	"Arousal": round(arousal, 3),
	"Engagement_Proxy": round(engagement, 3),
	"Stress_Proxy": round(stress, 3),
	"Cognitive_Load_Proxy": round(cog_load, 3),
	"Blink_Rate_Hz": round(blink_rate, 3),
	"Pupil_Size_Proxy": round(video_features.get("avg_pupil_proxy", 0), 3)
	# --- Exclude Traits ---
	}

	def log_to_csv(filename, metrics_dict):
	file_exists = os.path.isfile(filename)
	with open(filename, 'a', newline='') as csvfile:
	writer = csv.DictWriter(csvfile, fieldnames=metrics_dict.keys())
	if not file_exists:
	writer.writeheader() # Write header only once
	writer.writerow(metrics_dict)

	# --- Capture Threads (Simplified Example - Needs proper implementation) ---
	video_active = True
	audio_active = True

	def video_capture_thread():
	cap = cv2.VideoCapture(0)
	while video_active:
	ret, frame = cap.read()
	if ret:
	ts = time.time()
	# Make copies to avoid issues if buffer processes frame later
	frame_buffer.append(frame.copy())
	frame_timestamps.append(ts)
	time.sleep(1/30.0) # Limit capture rate
	cap.release()
	print("Video thread stopped.")

	def audio_capture_callback(indata, frames, time_info, status):
	"""This is called (from a separate thread) for each audio block."""
	if status:
	print(status)
	ts = time.time() # Timestamp the arrival of the chunk
	# Make copies to avoid issues if buffer processes chunk later
	audio_buffer.append(indata.copy())
	audio_timestamps.append(ts) # Add timestamp for the chunk

	def audio_capture_thread():
	with sd.InputStream(samplerate=SAMPLE_RATE, channels=AUDIO_CHANNELS, callback=audio_capture_callback):
	print("Audio stream started. Press Ctrl+C to stop.")
	while audio_active:
	sd.sleep(1000) # Keep thread alive while stream is running
	print("Audio thread stopped.")

	# --- Main Processing Logic ---
	import os
	if __name__ == "__main__":
	print("Starting capture threads...")
	vid_thread = threading.Thread(target=video_capture_thread, daemon=True)
	aud_thread = threading.Thread(target=audio_capture_thread, daemon=True)
	vid_thread.start()
	aud_thread.start()

	last_process_time = time.time()

	try:
	while True:
	current_time = time.time()
	if current_time - last_process_time >= PROCESSING_INTERVAL_SECONDS:
	print(f"\n--- Processing window ending {time.strftime('%H:%M:%S')} ---")
	window_end_time = current_time
	window_start_time = window_end_time - PROCESSING_INTERVAL_SECONDS

	# --- Get data for the window (Needs thread safety - locks!) ---
	# This part is tricky: efficiently select items in the timestamp range
	# Simple non-thread-safe example:
	frames_in_window = [f for f, ts in zip(list(frame_buffer), list(frame_timestamps)) if window_start_time <= ts < window_end_time]
	audio_in_window = [a for a, ts in zip(list(audio_buffer), list(audio_timestamps)) if window_start_time <= ts < window_end_time]
	# In practice, you'd remove processed items from the buffer

	if not frames_in_window:
	print("No frames in window, skipping.")
	last_process_time = current_time # Or += PROCESSING_INTERVAL_SECONDS
	continue

	# --- Analyze ---
	video_features = analyze_video_window(frames_in_window, []) # Pass timestamps if needed
	audio_features = analyze_audio_window(audio_in_window, []) # Pass timestamps if needed

	# --- Calculate & Log ---
	final_metrics = calculate_final_metrics(video_features, audio_features)
	print("Calculated Metrics:", final_metrics)
	log_to_csv(CSV_FILENAME, final_metrics)

	last_process_time = current_time # Reset timer accurately


	time.sleep(0.1) # Prevent busy-waiting

	except KeyboardInterrupt:
	print("Stopping...")
	video_active = False
	audio_active = False
	# Wait for threads to finish
	vid_thread.join(timeout=2.0)
	# Audio thread stops when sd.sleep ends or stream closes
	print("Done.")