ans123 commited on
Commit
c93aa8f
·
verified ·
1 Parent(s): d57a5c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +192 -85
app.py CHANGED
@@ -1,94 +1,201 @@
1
  import cv2
 
 
 
 
 
 
2
  import time
3
  import csv
4
- import gradio as gr
5
- import numpy as np
6
- import sounddevice as sd
7
- import soundfile as sf
8
- from deepface import DeepFace
9
- import tempfile
10
- import os
11
- from scipy.io.wavfile import write as write_wav
12
-
13
 
14
- # === Audio recording config ===
15
  SAMPLE_RATE = 16000
16
- DURATION = 30 # seconds
17
-
18
-
19
- def record_audio(filename="audio_recording.wav"):
20
- print("Recording audio for 30 seconds...")
21
- audio = sd.rec(int(SAMPLE_RATE * DURATION), samplerate=SAMPLE_RATE, channels=1, dtype='int16')
22
- sd.wait()
23
- write_wav(filename, SAMPLE_RATE, audio)
24
- return filename
25
-
26
-
27
- def map_emotions_to_metrics(emotion):
28
- valence = emotion.get("happy", 0) / 100
29
- arousal = (emotion.get("angry", 0) + emotion.get("surprise", 0)) / 200
30
- stress_index = emotion.get("fear", 0) / 100
31
- engagement_level = 1 - emotion.get("neutral", 0) / 100
32
- dominance = (emotion.get("angry", 0) + emotion.get("disgust", 0)) / 200
33
- return valence, arousal, dominance, stress_index, engagement_level
34
-
35
-
36
- def analyze_inputs():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  cap = cv2.VideoCapture(0)
38
- start_time = time.time()
39
- data = []
40
-
41
- # Start audio recording in parallel
42
- audio_file_path = os.path.join(tempfile.gettempdir(), "temp_audio.wav")
43
- record_audio(audio_file_path)
44
-
45
- for i in range(DURATION):
46
  ret, frame = cap.read()
47
- if not ret:
48
- continue
49
-
50
- try:
51
- result = DeepFace.analyze(frame, actions=['emotion'], enforce_detection=False)
52
- emotion = result[0]['emotion']
53
- valence, arousal, dominance, stress_index, engagement_level = map_emotions_to_metrics(emotion)
54
-
55
- data.append([
56
- round(time.time() - start_time, 2),
57
- round(valence, 3),
58
- round(arousal, 3),
59
- round(dominance, 3),
60
- round(stress_index, 3),
61
- round(engagement_level, 3)
62
- ])
63
-
64
- except Exception as e:
65
- print("Error analyzing frame:", e)
66
- time.sleep(1)
67
-
68
  cap.release()
69
-
70
- # Write to CSV
71
- csv_path = os.path.join(tempfile.gettempdir(), "emotional_metrics.csv")
72
- with open(csv_path, mode='w', newline='') as f:
73
- writer = csv.writer(f)
74
- writer.writerow(["timestamp", "valence", "arousal", "dominance", "stress_index", "engagement_level"])
75
- writer.writerows(data)
76
-
77
- return csv_path
78
-
79
-
80
- def demo_interface(campaign_text):
81
- csv_output = analyze_inputs()
82
- return csv_output
83
-
84
-
85
- demo = gr.Interface(
86
- fn=demo_interface,
87
- inputs=gr.Textbox(label="Enter Campaign Description", lines=2, placeholder="Describe your campaign..."),
88
- outputs=gr.File(label="Download Emotional Metrics (CSV Output)"),
89
- title="📊 Complex Campaign Emotion Response Engine",
90
- description="This demo captures webcam and microphone input for 30 seconds. It analyzes facial expressions using DeepFace and records audio. The output is a downloadable CSV of emotional metrics over time."
91
- )
92
-
93
  if __name__ == "__main__":
94
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import cv2
2
+ import sounddevice as sd
3
+ import mediapipe as mp
4
+ import numpy as np
5
+ import pandas as pd
6
+ import librosa
7
+ import threading
8
  import time
9
  import csv
10
+ from collections import deque
 
 
 
 
 
 
 
 
11
 
12
+ # --- Configuration ---
13
  SAMPLE_RATE = 16000
14
+ AUDIO_CHANNELS = 1
15
+ BUFFER_DURATION_SECONDS = 10 # Keep last 10s of data
16
+ PROCESSING_INTERVAL_SECONDS = 4.0
17
+ CSV_FILENAME = "metrics_log.csv"
18
+
19
+ # --- Buffers (use thread-safe versions if needed) ---
20
+ frame_buffer = deque(maxlen=int(BUFFER_DURATION_SECONDS * 30)) # Assuming ~30fps
21
+ audio_buffer = deque(maxlen=int(BUFFER_DURATION_SECONDS * SAMPLE_RATE))
22
+ frame_timestamps = deque(maxlen=int(BUFFER_DURATION_SECONDS * 30))
23
+ audio_timestamps = deque(maxlen=int(BUFFER_DURATION_SECONDS * SAMPLE_RATE)) # Timestamps per chunk
24
+
25
+ # --- MediaPipe Setup ---
26
+ mp_face_mesh = mp.solutions.face_mesh
27
+ mp_drawing = mp.solutions.drawing_utils
28
+ drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
29
+ face_mesh = mp_face_mesh.FaceMesh(
30
+ max_num_faces=1,
31
+ refine_landmarks=True, # Crucial for iris/pupil
32
+ min_detection_confidence=0.5,
33
+ min_tracking_confidence=0.5)
34
+
35
+ # --- Placeholder Functions (Requires detailed implementation) ---
36
+ def analyze_video_window(frames, timestamps):
37
+ print(f"Analyzing {len(frames)} frames...")
38
+ # TODO:
39
+ # - Run MediaPipe Face Mesh + Iris on each frame
40
+ # - Extract face presence, landmarks, blink status, pupil data per frame
41
+ # - Aggregate: % face detected, avg emotion scores (if using FER), avg pupil proxy, total blinks
42
+ # - Return aggregated features
43
+ blink_count = np.random.randint(0, 5) # Placeholder
44
+ avg_pupil_proxy = np.random.rand() # Placeholder
45
+ face_detected_ratio = np.random.rand() # Placeholder
46
+ avg_valence_proxy = (np.random.rand() - 0.5) * 2 # Placeholder [-1, 1]
47
+ avg_arousal_proxy_face = np.random.rand() # Placeholder [0, 1]
48
+ return {
49
+ "blink_count": blink_count,
50
+ "avg_pupil_proxy": avg_pupil_proxy,
51
+ "face_detected_ratio": face_detected_ratio,
52
+ "avg_valence_proxy": avg_valence_proxy,
53
+ "avg_arousal_proxy_face": avg_arousal_proxy_face
54
+ }
55
+
56
+ def analyze_audio_window(audio_chunks, timestamps):
57
+ if not audio_chunks:
58
+ return {"avg_rms": 0, "avg_pitch": 0} # Default
59
+ print(f"Analyzing {len(audio_chunks)} audio chunks...")
60
+ # TODO:
61
+ # - Concatenate chunks carefully based on timestamps / expected samples
62
+ # - Run librosa: calculate RMS, pitch (e.g., pyin), maybe pauses
63
+ # - Return aggregated features
64
+ full_audio = np.concatenate(audio_chunks)
65
+ avg_rms = np.sqrt(np.mean(full_audio**2)) # Basic RMS
66
+ # Pitch estimation can be computationally expensive
67
+ # pitches, magnitudes = librosa.pyin(full_audio, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'), sr=SAMPLE_RATE)
68
+ # avg_pitch = np.nanmean(pitches) if pitches is not None and len(pitches) > 0 else 0
69
+ avg_pitch = np.random.randint(80, 300) # Placeholder
70
+ return {"avg_rms": avg_rms, "avg_pitch": avg_pitch}
71
+
72
+
73
+ def calculate_final_metrics(video_features, audio_features):
74
+ # TODO: Combine features into the final 0-1 metrics
75
+ # This requires defining heuristics or a simple model based on the features
76
+ valence = (video_features.get("avg_valence_proxy", 0) + 1) / 2 # Normalize [-1,1] to [0,1]
77
+
78
+ # Combine multiple arousal indicators (weights are examples)
79
+ arousal_face = video_features.get("avg_arousal_proxy_face", 0)
80
+ arousal_voice_rms = min(audio_features.get("avg_rms", 0) * 10, 1.0) # Scale RMS
81
+ arousal_pupil = video_features.get("avg_pupil_proxy", 0.5) # Assuming pupil proxy is 0-1
82
+ arousal = (0.4 * arousal_face + 0.3 * arousal_voice_rms + 0.3 * arousal_pupil)
83
+
84
+ engagement = video_features.get("face_detected_ratio", 0) # Simple proxy
85
+ # Could add logic based on blink rate deviations, gaze stability etc.
86
+
87
+ # Stress based on neg valence, high arousal
88
+ stress = max(0, (1.0 - valence) * arousal) # Example heuristic
89
+
90
+ # Cog load based on blink rate, pupil dilation
91
+ blink_rate = video_features.get("blink_count", 0) / PROCESSING_INTERVAL_SECONDS
92
+ # Normalize blink rate based on expected range (e.g. 0-1 Hz)
93
+ norm_blink_rate = min(blink_rate, 1.0)
94
+ cog_load = (0.5 * arousal_pupil + 0.5 * norm_blink_rate) # Example heuristic
95
+
96
+ return {
97
+ "Timestamp": time.strftime('%Y-%m-%d %H:%M:%S'),
98
+ "Valence": round(valence, 3),
99
+ "Arousal": round(arousal, 3),
100
+ "Engagement_Proxy": round(engagement, 3),
101
+ "Stress_Proxy": round(stress, 3),
102
+ "Cognitive_Load_Proxy": round(cog_load, 3),
103
+ "Blink_Rate_Hz": round(blink_rate, 3),
104
+ "Pupil_Size_Proxy": round(video_features.get("avg_pupil_proxy", 0), 3)
105
+ # --- Exclude Traits ---
106
+ }
107
+
108
+ def log_to_csv(filename, metrics_dict):
109
+ file_exists = os.path.isfile(filename)
110
+ with open(filename, 'a', newline='') as csvfile:
111
+ writer = csv.DictWriter(csvfile, fieldnames=metrics_dict.keys())
112
+ if not file_exists:
113
+ writer.writeheader() # Write header only once
114
+ writer.writerow(metrics_dict)
115
+
116
+ # --- Capture Threads (Simplified Example - Needs proper implementation) ---
117
+ video_active = True
118
+ audio_active = True
119
+
120
+ def video_capture_thread():
121
  cap = cv2.VideoCapture(0)
122
+ while video_active:
 
 
 
 
 
 
 
123
  ret, frame = cap.read()
124
+ if ret:
125
+ ts = time.time()
126
+ # Make copies to avoid issues if buffer processes frame later
127
+ frame_buffer.append(frame.copy())
128
+ frame_timestamps.append(ts)
129
+ time.sleep(1/30.0) # Limit capture rate
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  cap.release()
131
+ print("Video thread stopped.")
132
+
133
+ def audio_capture_callback(indata, frames, time_info, status):
134
+ """This is called (from a separate thread) for each audio block."""
135
+ if status:
136
+ print(status)
137
+ ts = time.time() # Timestamp the arrival of the chunk
138
+ # Make copies to avoid issues if buffer processes chunk later
139
+ audio_buffer.append(indata.copy())
140
+ audio_timestamps.append(ts) # Add timestamp for the chunk
141
+
142
+ def audio_capture_thread():
143
+ with sd.InputStream(samplerate=SAMPLE_RATE, channels=AUDIO_CHANNELS, callback=audio_capture_callback):
144
+ print("Audio stream started. Press Ctrl+C to stop.")
145
+ while audio_active:
146
+ sd.sleep(1000) # Keep thread alive while stream is running
147
+ print("Audio thread stopped.")
148
+
149
+ # --- Main Processing Logic ---
150
+ import os
 
 
 
 
151
  if __name__ == "__main__":
152
+ print("Starting capture threads...")
153
+ vid_thread = threading.Thread(target=video_capture_thread, daemon=True)
154
+ aud_thread = threading.Thread(target=audio_capture_thread, daemon=True)
155
+ vid_thread.start()
156
+ aud_thread.start()
157
+
158
+ last_process_time = time.time()
159
+
160
+ try:
161
+ while True:
162
+ current_time = time.time()
163
+ if current_time - last_process_time >= PROCESSING_INTERVAL_SECONDS:
164
+ print(f"\n--- Processing window ending {time.strftime('%H:%M:%S')} ---")
165
+ window_end_time = current_time
166
+ window_start_time = window_end_time - PROCESSING_INTERVAL_SECONDS
167
+
168
+ # --- Get data for the window (Needs thread safety - locks!) ---
169
+ # This part is tricky: efficiently select items in the timestamp range
170
+ # Simple non-thread-safe example:
171
+ frames_in_window = [f for f, ts in zip(list(frame_buffer), list(frame_timestamps)) if window_start_time <= ts < window_end_time]
172
+ audio_in_window = [a for a, ts in zip(list(audio_buffer), list(audio_timestamps)) if window_start_time <= ts < window_end_time]
173
+ # In practice, you'd remove processed items from the buffer
174
+
175
+ if not frames_in_window:
176
+ print("No frames in window, skipping.")
177
+ last_process_time = current_time # Or += PROCESSING_INTERVAL_SECONDS
178
+ continue
179
+
180
+ # --- Analyze ---
181
+ video_features = analyze_video_window(frames_in_window, []) # Pass timestamps if needed
182
+ audio_features = analyze_audio_window(audio_in_window, []) # Pass timestamps if needed
183
+
184
+ # --- Calculate & Log ---
185
+ final_metrics = calculate_final_metrics(video_features, audio_features)
186
+ print("Calculated Metrics:", final_metrics)
187
+ log_to_csv(CSV_FILENAME, final_metrics)
188
+
189
+ last_process_time = current_time # Reset timer accurately
190
+
191
+
192
+ time.sleep(0.1) # Prevent busy-waiting
193
+
194
+ except KeyboardInterrupt:
195
+ print("Stopping...")
196
+ video_active = False
197
+ audio_active = False
198
+ # Wait for threads to finish
199
+ vid_thread.join(timeout=2.0)
200
+ # Audio thread stops when sd.sleep ends or stream closes
201
+ print("Done.")