Spaces:

ans123
/

EMOTIV

Sleeping

App Files Files Community

ans123 commited on Apr 23

Commit

c93aa8f

verified ·

1 Parent(s): d57a5c8

Update app.py

Browse files

Files changed (1) hide show

app.py +192 -85

app.py CHANGED Viewed

@@ -1,94 +1,201 @@
 import cv2
 import time
 import csv
-import gradio as gr
-import numpy as np
-import sounddevice as sd
-import soundfile as sf
-from deepface import DeepFace
-import tempfile
-import os
-from scipy.io.wavfile import write as write_wav
-# === Audio recording config ===
 SAMPLE_RATE = 16000
-DURATION = 30  # seconds
-def record_audio(filename="audio_recording.wav"):
-    print("Recording audio for 30 seconds...")
-    audio = sd.rec(int(SAMPLE_RATE * DURATION), samplerate=SAMPLE_RATE, channels=1, dtype='int16')
-    sd.wait()
-    write_wav(filename, SAMPLE_RATE, audio)
-    return filename
-def map_emotions_to_metrics(emotion):
-    valence = emotion.get("happy", 0) / 100
-    arousal = (emotion.get("angry", 0) + emotion.get("surprise", 0)) / 200
-    stress_index = emotion.get("fear", 0) / 100
-    engagement_level = 1 - emotion.get("neutral", 0) / 100
-    dominance = (emotion.get("angry", 0) + emotion.get("disgust", 0)) / 200
-    return valence, arousal, dominance, stress_index, engagement_level
-def analyze_inputs():
     cap = cv2.VideoCapture(0)
-    start_time = time.time()
-    data = []
-    # Start audio recording in parallel
-    audio_file_path = os.path.join(tempfile.gettempdir(), "temp_audio.wav")
-    record_audio(audio_file_path)
-    for i in range(DURATION):
         ret, frame = cap.read()
-        if not ret:
-            continue
-        try:
-            result = DeepFace.analyze(frame, actions=['emotion'], enforce_detection=False)
-            emotion = result[0]['emotion']
-            valence, arousal, dominance, stress_index, engagement_level = map_emotions_to_metrics(emotion)
-            data.append([
-                round(time.time() - start_time, 2),
-                round(valence, 3),
-                round(arousal, 3),
-                round(dominance, 3),
-                round(stress_index, 3),
-                round(engagement_level, 3)
-            ])
-        except Exception as e:
-            print("Error analyzing frame:", e)
-        time.sleep(1)
     cap.release()
-    # Write to CSV
-    csv_path = os.path.join(tempfile.gettempdir(), "emotional_metrics.csv")
-    with open(csv_path, mode='w', newline='') as f:
-        writer = csv.writer(f)
-        writer.writerow(["timestamp", "valence", "arousal", "dominance", "stress_index", "engagement_level"])
-        writer.writerows(data)
-    return csv_path
-def demo_interface(campaign_text):
-    csv_output = analyze_inputs()
-    return csv_output
-demo = gr.Interface(
-    fn=demo_interface,
-    inputs=gr.Textbox(label="Enter Campaign Description", lines=2, placeholder="Describe your campaign..."),
-    outputs=gr.File(label="Download Emotional Metrics (CSV Output)"),
-    title="📊 Complex Campaign Emotion Response Engine",
-    description="This demo captures webcam and microphone input for 30 seconds. It analyzes facial expressions using DeepFace and records audio. The output is a downloadable CSV of emotional metrics over time."
-)
 if __name__ == "__main__":
-    demo.launch()

 import cv2
+import sounddevice as sd
+import mediapipe as mp
+import numpy as np
+import pandas as pd
+import librosa
+import threading
 import time
 import csv
+from collections import deque
+# --- Configuration ---
 SAMPLE_RATE = 16000
+AUDIO_CHANNELS = 1
+BUFFER_DURATION_SECONDS = 10 # Keep last 10s of data
+PROCESSING_INTERVAL_SECONDS = 4.0
+CSV_FILENAME = "metrics_log.csv"
+# --- Buffers (use thread-safe versions if needed) ---
+frame_buffer = deque(maxlen=int(BUFFER_DURATION_SECONDS * 30)) # Assuming ~30fps
+audio_buffer = deque(maxlen=int(BUFFER_DURATION_SECONDS * SAMPLE_RATE))
+frame_timestamps = deque(maxlen=int(BUFFER_DURATION_SECONDS * 30))
+audio_timestamps = deque(maxlen=int(BUFFER_DURATION_SECONDS * SAMPLE_RATE)) # Timestamps per chunk
+# --- MediaPipe Setup ---
+mp_face_mesh = mp.solutions.face_mesh
+mp_drawing = mp.solutions.drawing_utils
+drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
+face_mesh = mp_face_mesh.FaceMesh(
+    max_num_faces=1,
+    refine_landmarks=True, # Crucial for iris/pupil
+    min_detection_confidence=0.5,
+    min_tracking_confidence=0.5)
+# --- Placeholder Functions (Requires detailed implementation) ---
+def analyze_video_window(frames, timestamps):
+    print(f"Analyzing {len(frames)} frames...")
+    # TODO:
+    # - Run MediaPipe Face Mesh + Iris on each frame
+    # - Extract face presence, landmarks, blink status, pupil data per frame
+    # - Aggregate: % face detected, avg emotion scores (if using FER), avg pupil proxy, total blinks
+    # - Return aggregated features
+    blink_count = np.random.randint(0, 5) # Placeholder
+    avg_pupil_proxy = np.random.rand() # Placeholder
+    face_detected_ratio = np.random.rand() # Placeholder
+    avg_valence_proxy = (np.random.rand() - 0.5) * 2 # Placeholder [-1, 1]
+    avg_arousal_proxy_face = np.random.rand() # Placeholder [0, 1]
+    return {
+        "blink_count": blink_count,
+        "avg_pupil_proxy": avg_pupil_proxy,
+        "face_detected_ratio": face_detected_ratio,
+        "avg_valence_proxy": avg_valence_proxy,
+        "avg_arousal_proxy_face": avg_arousal_proxy_face
+    }
+def analyze_audio_window(audio_chunks, timestamps):
+    if not audio_chunks:
+        return {"avg_rms": 0, "avg_pitch": 0} # Default
+    print(f"Analyzing {len(audio_chunks)} audio chunks...")
+    # TODO:
+    # - Concatenate chunks carefully based on timestamps / expected samples
+    # - Run librosa: calculate RMS, pitch (e.g., pyin), maybe pauses
+    # - Return aggregated features
+    full_audio = np.concatenate(audio_chunks)
+    avg_rms = np.sqrt(np.mean(full_audio**2)) # Basic RMS
+    # Pitch estimation can be computationally expensive
+    # pitches, magnitudes = librosa.pyin(full_audio, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'), sr=SAMPLE_RATE)
+    # avg_pitch = np.nanmean(pitches) if pitches is not None and len(pitches) > 0 else 0
+    avg_pitch = np.random.randint(80, 300) # Placeholder
+    return {"avg_rms": avg_rms, "avg_pitch": avg_pitch}
+def calculate_final_metrics(video_features, audio_features):
+    # TODO: Combine features into the final 0-1 metrics
+    # This requires defining heuristics or a simple model based on the features
+    valence = (video_features.get("avg_valence_proxy", 0) + 1) / 2 # Normalize [-1,1] to [0,1]
+    # Combine multiple arousal indicators (weights are examples)
+    arousal_face = video_features.get("avg_arousal_proxy_face", 0)
+    arousal_voice_rms = min(audio_features.get("avg_rms", 0) * 10, 1.0) # Scale RMS
+    arousal_pupil = video_features.get("avg_pupil_proxy", 0.5) # Assuming pupil proxy is 0-1
+    arousal = (0.4 * arousal_face + 0.3 * arousal_voice_rms + 0.3 * arousal_pupil)
+    engagement = video_features.get("face_detected_ratio", 0) # Simple proxy
+    # Could add logic based on blink rate deviations, gaze stability etc.
+    # Stress based on neg valence, high arousal
+    stress = max(0, (1.0 - valence) * arousal) # Example heuristic
+    # Cog load based on blink rate, pupil dilation
+    blink_rate = video_features.get("blink_count", 0) / PROCESSING_INTERVAL_SECONDS
+    # Normalize blink rate based on expected range (e.g. 0-1 Hz)
+    norm_blink_rate = min(blink_rate, 1.0)
+    cog_load = (0.5 * arousal_pupil + 0.5 * norm_blink_rate) # Example heuristic
+    return {
+        "Timestamp": time.strftime('%Y-%m-%d %H:%M:%S'),
+        "Valence": round(valence, 3),
+        "Arousal": round(arousal, 3),
+        "Engagement_Proxy": round(engagement, 3),
+        "Stress_Proxy": round(stress, 3),
+        "Cognitive_Load_Proxy": round(cog_load, 3),
+        "Blink_Rate_Hz": round(blink_rate, 3),
+        "Pupil_Size_Proxy": round(video_features.get("avg_pupil_proxy", 0), 3)
+        # --- Exclude Traits ---
+    }
+def log_to_csv(filename, metrics_dict):
+    file_exists = os.path.isfile(filename)
+    with open(filename, 'a', newline='') as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=metrics_dict.keys())
+        if not file_exists:
+            writer.writeheader() # Write header only once
+        writer.writerow(metrics_dict)
+# --- Capture Threads (Simplified Example - Needs proper implementation) ---
+video_active = True
+audio_active = True
+def video_capture_thread():
     cap = cv2.VideoCapture(0)
+    while video_active:
         ret, frame = cap.read()
+        if ret:
+            ts = time.time()
+            # Make copies to avoid issues if buffer processes frame later
+            frame_buffer.append(frame.copy())
+            frame_timestamps.append(ts)
+        time.sleep(1/30.0) # Limit capture rate
     cap.release()
+    print("Video thread stopped.")
+def audio_capture_callback(indata, frames, time_info, status):
+    """This is called (from a separate thread) for each audio block."""
+    if status:
+        print(status)
+    ts = time.time() # Timestamp the arrival of the chunk
+    # Make copies to avoid issues if buffer processes chunk later
+    audio_buffer.append(indata.copy())
+    audio_timestamps.append(ts) # Add timestamp for the chunk
+def audio_capture_thread():
+    with sd.InputStream(samplerate=SAMPLE_RATE, channels=AUDIO_CHANNELS, callback=audio_capture_callback):
+        print("Audio stream started. Press Ctrl+C to stop.")
+        while audio_active:
+            sd.sleep(1000) # Keep thread alive while stream is running
+    print("Audio thread stopped.")
+# --- Main Processing Logic ---
+import os
 if __name__ == "__main__":
+    print("Starting capture threads...")
+    vid_thread = threading.Thread(target=video_capture_thread, daemon=True)
+    aud_thread = threading.Thread(target=audio_capture_thread, daemon=True)
+    vid_thread.start()
+    aud_thread.start()
+    last_process_time = time.time()
+    try:
+        while True:
+            current_time = time.time()
+            if current_time - last_process_time >= PROCESSING_INTERVAL_SECONDS:
+                print(f"\n--- Processing window ending {time.strftime('%H:%M:%S')} ---")
+                window_end_time = current_time
+                window_start_time = window_end_time - PROCESSING_INTERVAL_SECONDS
+                # --- Get data for the window (Needs thread safety - locks!) ---
+                # This part is tricky: efficiently select items in the timestamp range
+                # Simple non-thread-safe example:
+                frames_in_window = [f for f, ts in zip(list(frame_buffer), list(frame_timestamps)) if window_start_time <= ts < window_end_time]
+                audio_in_window = [a for a, ts in zip(list(audio_buffer), list(audio_timestamps)) if window_start_time <= ts < window_end_time]
+                # In practice, you'd remove processed items from the buffer
+                if not frames_in_window:
+                    print("No frames in window, skipping.")
+                    last_process_time = current_time # Or += PROCESSING_INTERVAL_SECONDS
+                    continue
+                # --- Analyze ---
+                video_features = analyze_video_window(frames_in_window, []) # Pass timestamps if needed
+                audio_features = analyze_audio_window(audio_in_window, []) # Pass timestamps if needed
+                # --- Calculate & Log ---
+                final_metrics = calculate_final_metrics(video_features, audio_features)
+                print("Calculated Metrics:", final_metrics)
+                log_to_csv(CSV_FILENAME, final_metrics)
+                last_process_time = current_time # Reset timer accurately
+            time.sleep(0.1) # Prevent busy-waiting
+    except KeyboardInterrupt:
+        print("Stopping...")
+        video_active = False
+        audio_active = False
+        # Wait for threads to finish
+        vid_thread.join(timeout=2.0)
+        # Audio thread stops when sd.sleep ends or stream closes
+        print("Done.")