Spaces:

ans123
/

PSYCHOMETER_2.0

Configuration error

App Files Files Community

ans123 commited on Apr 30

Commit

b0b077f

verified ·

1 Parent(s): 6b9ebdb

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -186

app.py CHANGED Viewed

@@ -18,9 +18,7 @@ from deepface import DeepFace
 import base64
 import io
 from pathlib import Path
-import torch
-from transformers import AutoProcessor, AutoModelForCausalLM, pipeline
-from io import BytesIO
 # Suppress warnings for cleaner output
 warnings.filterwarnings('ignore')
@@ -40,8 +38,8 @@ try:
         raise ValueError("GOOGLE_API_KEY environment variable not set.")
     genai.configure(api_key=GOOGLE_API_KEY)
-    # Use gemini-2.0-flash for quick responses
-    model = genai.GenerativeModel('gemini-2.0-flash')
     GEMINI_ENABLED = True
     print("Google Gemini API configured successfully.")
 except Exception as e:
@@ -49,45 +47,6 @@ except Exception as e:
     print("Running with simulated Gemini API responses.")
     GEMINI_ENABLED = False
-# --- Initialize LLaVA Vision Model ---
-print("Initializing LLaVA Vision Model...")
-LLAVA_ENABLED = False
-try:
-    # Check if GPU is available
-    if torch.cuda.is_available():
-        device = "cuda"
-    else:
-        device = "cpu"
-    # Use a smaller LLaVA model for better performance
-    model_id = "llava-hf/llava-1.5-7b-hf"
-    # Initialize the model
-    processor = AutoProcessor.from_pretrained(model_id)
-    llava_model = AutoModelForCausalLM.from_pretrained(
-        model_id,
-        torch_dtype=torch.float16 if device == "cuda" else torch.float32,
-        low_cpu_mem_usage=True if device == "cuda" else False,
-    ).to(device)
-    # Create a pipeline
-    vision_llm = pipeline(
-        "image-to-text",
-        model=llava_model,
-        tokenizer=processor.tokenizer,
-        image_processor=processor.image_processor,
-        device=device,
-        max_new_tokens=512,
-    )
-    LLAVA_ENABLED = True
-    print(f"LLaVA Vision Model initialized successfully on {device.upper()}")
-except Exception as e:
-    print(f"WARNING: Failed to initialize LLaVA Vision Model: {e}")
-    print("Running with DeepFace only (no LLaVA vision features).")
-    vision_llm = None
 # --- Initialize OpenCV face detector for backup ---
 print("Initializing OpenCV face detector...")
 try:
@@ -123,63 +82,10 @@ emotion_mapping = {
 }
 ad_context_columns = ["ad_description", "ad_detail", "ad_type", "gemini_ad_analysis"]
-user_state_columns = ["user_state", "enhanced_user_state", "llava_analysis"]
 all_columns = ['timestamp', 'frame_number'] + metrics + ad_context_columns + user_state_columns
 initial_metrics_df = pd.DataFrame(columns=all_columns)
-# --- LLaVA Vision Analysis Function ---
-def analyze_image_with_llava(image, ad_context=None):
-    """
-    Use LLaVA vision model to analyze facial expression and emotion in image
-    """
-    if not LLAVA_ENABLED or vision_llm is None or image is None:
-        return "LLaVA analysis not available"
-    try:
-        # Convert OpenCV image (BGR) to PIL Image (RGB)
-        if len(image.shape) == 3 and image.shape[2] == 3:
-            # Check if BGR and convert to RGB if needed
-            if np.mean(image[:,:,0]) < np.mean(image[:,:,2]):  # Rough BGR check
-                image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-            else:
-                image_rgb = image
-        else:
-            # Handle grayscale or other formats
-            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-        # Convert to PIL Image
-        pil_image = Image.fromarray(image_rgb)
-        # Create prompt based on ad context
-        ad_info = ""
-        if ad_context:
-            ad_desc = ad_context.get('ad_description', '')
-            ad_type = ad_context.get('ad_type', '')
-            if ad_desc:
-                ad_info = f" while watching an ad about {ad_desc} (type: {ad_type})"
-        prompt = f"""Analyze this person's facial expression and emotion{ad_info}.
-        Describe their emotional state, engagement level, and cognitive state in detail.
-        Focus on: valence (positive/negative emotion), arousal (excitement level),
-        attention, stress indicators, and overall reaction to what they're seeing.
-        """
-        # Process with Vision LLM
-        outputs = vision_llm(pil_image, prompt=prompt)
-        # Extract the generated text
-        if isinstance(outputs, list) and len(outputs) > 0:
-            if isinstance(outputs[0], dict) and "generated_text" in outputs[0]:
-                return outputs[0]["generated_text"]
-            elif isinstance(outputs[0], str):
-                return outputs[0]
-        return str(outputs) if outputs else "No results from LLaVA analysis"
-    except Exception as e:
-        print(f"Error in LLaVA analysis: {e}")
-        return f"LLaVA analysis error: {str(e)}"
 # --- Gemini API Functions ---
 def call_gemini_api_for_ad(description, detail, ad_type):
     """
@@ -212,12 +118,12 @@ def call_gemini_api_for_ad(description, detail, ad_type):
             print(f"Error calling Gemini for ad context: {e}")
             return f"Error analyzing ad context: {str(e)}"
-def interpret_metrics_with_gemini(metrics_dict, deepface_results=None, llava_analysis=None, ad_context=None):
     """
-    Uses Google Gemini to interpret facial metrics, DeepFace results and LLaVA analysis
     to determine user state.
     """
-    if not metrics_dict and not deepface_results and not llava_analysis:
         return "No metrics", "No facial data detected"
     if not GEMINI_ENABLED:
@@ -239,10 +145,6 @@ def interpret_metrics_with_gemini(metrics_dict, deepface_results=None, llava_ana
             state = "Stressed, Negative"
         enhanced_state = f"The viewer appears {state.lower()} while watching this content."
-        if llava_analysis and llava_analysis != "LLaVA analysis not available":
-            # Extract a brief summary from LLaVA analysis (first sentence)
-            first_sentence = llava_analysis.split('.')[0] + '.'
-            enhanced_state += f" {first_sentence}"
         return state, enhanced_state
     else:
@@ -259,11 +161,6 @@ def interpret_metrics_with_gemini(metrics_dict, deepface_results=None, llava_ana
                 emotion_dict = deepface_results["emotion"]
                 deepface_formatted = "\nDeepFace emotions:\n" + "\n".join([f"- {k.title()}: {v:.2f}" for k, v in emotion_dict.items()])
-            # Format LLaVA analysis
-            llava_formatted = ""
-            if llava_analysis and llava_analysis != "LLaVA analysis not available":
-                llava_formatted = f"\nLLaVA Vision Analysis:\n{llava_analysis}"
             # Include ad context if available
             ad_info = ""
             if ad_context:
@@ -274,7 +171,7 @@ def interpret_metrics_with_gemini(metrics_dict, deepface_results=None, llava_ana
             prompt = f"""
             Analyze the facial expression and emotion of a person watching an advertisement{ad_info}.
-            Use these combined inputs:{metrics_formatted}{deepface_formatted}{llava_formatted}
             Provide two outputs:
             1. User State: A short 1-3 word description of their emotional/cognitive state
@@ -303,6 +200,7 @@ def interpret_metrics_with_gemini(metrics_dict, deepface_results=None, llava_ana
         except Exception as e:
             print(f"Error calling Gemini for metric interpretation: {e}")
             return "Error", f"Error analyzing facial metrics: {str(e)}"
 # --- DeepFace Analysis Function ---
@@ -330,7 +228,7 @@ def analyze_face_with_deepface(image):
         # Analyze with DeepFace
         analysis = DeepFace.analyze(
             img_path=temp_img,
-            actions=['emotion', 'age', 'gender', 'race'],
             enforce_detection=False,  # Don't throw error if face not detected
             detector_backend='opencv'  # Faster detection
         )
@@ -422,16 +320,6 @@ def calculate_metrics_from_deepface(deepface_results, ad_context=None):
         arsl += 0.1
         dom -= 0.1
-    # Adjust for gender and age if available (just examples of potential factors)
-    if "gender" in deepface_results:
-        gender = deepface_results["gender"]
-        gender_score = deepface_results.get("gender_score", 0.5)
-        # No real adjustment needed, this is just an example
-    if "age" in deepface_results:
-        age = deepface_results["age"]
-        # No real adjustment needed, this is just an example
     # Illustrative Context Adjustments from ad
     ad_type = ad_context.get('ad_type', 'Unknown')
     gem_txt = str(ad_context.get('gemini_ad_analysis', '')).lower()
@@ -682,16 +570,10 @@ def process_video_file(
             if not deepface_results or "region" not in deepface_results:
                 face_data = detect_face_opencv(video_file)
-            # Use LLaVA for additional analysis (once per frame)
-            llava_analysis = "LLaVA analysis not available"
-            if face_data is not None or (deepface_results and "region" in deepface_results):
-                # Only use LLaVA if a face was detected
-                llava_analysis = analyze_image_with_llava(video_file, ad_context)
             # Calculate metrics if face detected
             if deepface_results or face_data:
                 calculated_metrics = calculate_metrics_from_deepface(deepface_results, ad_context)
-                user_state, enhanced_state = interpret_metrics_with_gemini(calculated_metrics, deepface_results, llava_analysis, ad_context)
                 # Create a row for the dataframe
                 row = {
@@ -700,8 +582,7 @@ def process_video_file(
                     **calculated_metrics,
                     **ad_context,
                     'user_state': user_state,
-                    'enhanced_user_state': enhanced_state,
-                    'llava_analysis': llava_analysis
                 }
                 metrics_data.append(row)
@@ -742,13 +623,10 @@ def process_video_file(
     metrics_data = []
     processed_frames = []
     frame_count = 0
-    llava_counter = 0  # To limit LLaVA analysis (it's slow)
-    llava_interval = sampling_rate * 10  # Run LLaVA every X frames
     if show_progress:
         print(f"Processing video with {total_frames} frames at {fps} FPS")
         print(f"Ad Context: {ad_description} ({ad_type})")
-        print(f"LLaVA Vision Model: {'Enabled' if LLAVA_ENABLED else 'Disabled'}")
     while True:
         ret, frame = cap.read()
@@ -768,17 +646,10 @@ def process_video_file(
             if not deepface_results or "region" not in deepface_results:
                 face_data = detect_face_opencv(frame)
-            # Use LLaVA for additional analysis (periodically to save time)
-            llava_analysis = "LLaVA analysis not available"
-            if (face_data is not None or (deepface_results and "region" in deepface_results)) and llava_counter % llava_interval == 0:
-                # Only use LLaVA if a face was detected and on the right interval
-                llava_analysis = analyze_image_with_llava(frame, ad_context)
-            llava_counter += 1
             # Calculate metrics if face detected
             if deepface_results or face_data:
                 calculated_metrics = calculate_metrics_from_deepface(deepface_results, ad_context)
-                user_state, enhanced_state = interpret_metrics_with_gemini(calculated_metrics, deepface_results, llava_analysis, ad_context)
                 # Create a row for the dataframe
                 row = {
@@ -787,8 +658,7 @@ def process_video_file(
                     **calculated_metrics,
                     **ad_context,
                     'user_state': user_state,
-                    'enhanced_user_state': enhanced_state,
-                    'llava_analysis': llava_analysis
                 }
                 metrics_data.append(row)
@@ -835,9 +705,8 @@ def process_webcam_frame(
     ad_context: Dict[str, Any],
     metrics_data: pd.DataFrame,
     frame_count: int,
-    start_time: float,
-    llava_counter: int
-) -> Tuple[np.ndarray, Dict[str, float], str, str, pd.DataFrame, int]:
     """
     Process a single webcam frame
@@ -847,13 +716,12 @@ def process_webcam_frame(
         metrics_data: DataFrame to accumulate metrics
         frame_count: Current frame count
         start_time: Start time of the session
-        llava_counter: Counter to limit LLaVA calls
     Returns:
-        Tuple of (annotated_frame, metrics_dict, enhanced_state, llava_analysis, updated_metrics_df, updated_llava_counter)
     """
     if frame is None:
-        return None, None, None, None, metrics_data, llava_counter
     # Analyze with DeepFace
     deepface_results = analyze_face_with_deepface(frame)
@@ -863,19 +731,10 @@ def process_webcam_frame(
     if not deepface_results or "region" not in deepface_results:
         face_data = detect_face_opencv(frame)
-    # Use LLaVA for periodic analysis (it's slow)
-    llava_analysis = "LLaVA analysis not available"
-    llava_interval = 30  # Run LLaVA every X frames
-    if (face_data is not None or (deepface_results and "region" in deepface_results)) and llava_counter % llava_interval == 0:
-        # Only use LLaVA if a face was detected and on the right interval
-        llava_analysis = analyze_image_with_llava(frame, ad_context)
-    llava_counter += 1
     # Calculate metrics if face detected
     if deepface_results or face_data:
         calculated_metrics = calculate_metrics_from_deepface(deepface_results, ad_context)
-        user_state, enhanced_state = interpret_metrics_with_gemini(calculated_metrics, deepface_results, llava_analysis, ad_context)
         # Create a row for the dataframe
         current_time = time.time()
@@ -885,8 +744,7 @@ def process_webcam_frame(
             **calculated_metrics,
             **ad_context,
             'user_state': user_state,
-            'enhanced_user_state': enhanced_state,
-            'llava_analysis': llava_analysis
         }
         # Add row to DataFrame
@@ -896,13 +754,13 @@ def process_webcam_frame(
         # Annotate the frame
         annotated_frame = annotate_frame(frame, face_data, deepface_results, calculated_metrics, enhanced_state)
-        return annotated_frame, calculated_metrics, enhanced_state, llava_analysis, metrics_data, llava_counter
     else:
         # No face detected
         no_face_frame = frame.copy()
         cv2.putText(no_face_frame, "No face detected", (30, 30),
                     cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
-        return no_face_frame, None, "No face detected", None, metrics_data, llava_counter
 def start_webcam_session(
     ad_description: str = "",
@@ -950,8 +808,7 @@ def start_webcam_session(
         "last_saved": 0,
         "record_video": record_video,
         "recorded_frames": [] if record_video else None,
-        "timestamps": [] if record_video else None,
-        "llava_counter": 0  # Counter to limit LLaVA calls
     }
     return session
@@ -959,7 +816,7 @@ def start_webcam_session(
 def update_webcam_session(
     session: Dict[str, Any],
     frame: np.ndarray
-) -> Tuple[np.ndarray, Dict[str, float], str, str, Dict[str, Any]]:
     """
     Update webcam session with a new frame
@@ -968,22 +825,20 @@ def update_webcam_session(
         frame: New frame from webcam
     Returns:
-        Tuple of (annotated_frame, metrics_dict, enhanced_state, llava_analysis, updated_session)
     """
     # Process the frame
-    annotated_frame, metrics, enhanced_state, llava_analysis, updated_df, updated_llava_counter = process_webcam_frame(
         frame,
         session["ad_context"],
         session["metrics_data"],
         session["frame_count"],
-        session["start_time"],
-        session["llava_counter"]
     )
     # Update session
     session["frame_count"] += 1
     session["metrics_data"] = updated_df
-    session["llava_counter"] = updated_llava_counter
     # Record frame if enabled
     if session["record_video"] and annotated_frame is not None:
@@ -996,7 +851,7 @@ def update_webcam_session(
             updated_df.to_csv(session["csv_path"], index=False)
         session["last_saved"] = session["frame_count"]
-    return annotated_frame, metrics, enhanced_state, llava_analysis, session
 def end_webcam_session(session: Dict[str, Any]) -> Tuple[str, str]:
     """
@@ -1053,7 +908,7 @@ def end_webcam_session(session: Dict[str, Any]) -> Tuple[str, str]:
 def create_api_interface():
     with gr.Blocks(title="Facial Analysis APIs") as iface:
         gr.Markdown(f"""
-        # Enhanced Facial Analysis APIs (LLaVA + DeepFace)
         This interface provides two API endpoints:
@@ -1061,8 +916,6 @@ def create_api_interface():
         2. **Webcam API**: Analyze live webcam feed in real-time
         Both APIs use DeepFace for emotion analysis and Google's Gemini API for enhanced interpretations.
-        **LLaVA Vision Model: {'✅ Enabled' if LLAVA_ENABLED else '❌ Disabled'}**
         """)
         with gr.Tab("Video File API"):
@@ -1181,9 +1034,6 @@ def create_api_interface():
                         with gr.Column():
                             enhanced_state_txt = gr.Textbox(label="Enhanced State Analysis", lines=3)
-                    with gr.Row():
-                        llava_analysis_txt = gr.Textbox(label="LLaVA Vision Analysis", lines=6)
                     with gr.Row():
                         download_csv = gr.File(label="Download Session Data")
                         download_video = gr.Video(label="Recorded Session")
@@ -1208,18 +1058,18 @@ def create_api_interface():
             def process_frame(frame, session):
                 if session is None:
-                    return frame, None, "No active session. Click 'Start Session' to begin.", "LLaVA analysis not available", session
                 # Process the frame
-                annotated_frame, metrics, enhanced_state, llava_analysis, updated_session = update_webcam_session(session, frame)
                 # Update the metrics plot if metrics available
                 if metrics:
                     metrics_plot = update_metrics_visualization(metrics)
-                    return annotated_frame, metrics_plot, enhanced_state, llava_analysis or "LLaVA analysis not available", updated_session
                 else:
                     # Return the annotated frame (likely with "No face detected")
-                    return annotated_frame, None, enhanced_state or "No metrics available", "LLaVA analysis not available", updated_session
             def end_session(session):
                 if session is None:
@@ -1245,7 +1095,7 @@ def create_api_interface():
             webcam_input.stream(
                 process_frame,
                 inputs=[webcam_input, session_data],
-                outputs=[processed_output, metrics_plot, enhanced_state_txt, llava_analysis_txt, session_data]
             )
             end_session_btn.click(
@@ -1258,8 +1108,7 @@ def create_api_interface():
 # Entry point
 if __name__ == "__main__":
-    print("Starting Enhanced Facial Analysis API (LLaVA + DeepFace)...")
     print(f"Gemini API {'enabled' if GEMINI_ENABLED else 'disabled (using simulation)'}")
-    print(f"LLaVA Vision Model {'enabled' if LLAVA_ENABLED else 'disabled (using DeepFace only)'}")
     iface = create_api_interface()
     iface.launch(debug=True)

 import base64
 import io
 from pathlib import Path
+import traceback
 # Suppress warnings for cleaner output
 warnings.filterwarnings('ignore')
         raise ValueError("GOOGLE_API_KEY environment variable not set.")
     genai.configure(api_key=GOOGLE_API_KEY)
+    # Use gemini-1.5-flash for quick responses
+    model = genai.GenerativeModel('gemini-1.5-flash')
     GEMINI_ENABLED = True
     print("Google Gemini API configured successfully.")
 except Exception as e:
     print("Running with simulated Gemini API responses.")
     GEMINI_ENABLED = False
 # --- Initialize OpenCV face detector for backup ---
 print("Initializing OpenCV face detector...")
 try:
 }
 ad_context_columns = ["ad_description", "ad_detail", "ad_type", "gemini_ad_analysis"]
+user_state_columns = ["user_state", "enhanced_user_state"]
 all_columns = ['timestamp', 'frame_number'] + metrics + ad_context_columns + user_state_columns
 initial_metrics_df = pd.DataFrame(columns=all_columns)
 # --- Gemini API Functions ---
 def call_gemini_api_for_ad(description, detail, ad_type):
     """
             print(f"Error calling Gemini for ad context: {e}")
             return f"Error analyzing ad context: {str(e)}"
+def interpret_metrics_with_gemini(metrics_dict, deepface_results=None, ad_context=None):
     """
+    Uses Google Gemini to interpret facial metrics and DeepFace results
     to determine user state.
     """
+    if not metrics_dict and not deepface_results:
         return "No metrics", "No facial data detected"
     if not GEMINI_ENABLED:
             state = "Stressed, Negative"
         enhanced_state = f"The viewer appears {state.lower()} while watching this content."
         return state, enhanced_state
     else:
                 emotion_dict = deepface_results["emotion"]
                 deepface_formatted = "\nDeepFace emotions:\n" + "\n".join([f"- {k.title()}: {v:.2f}" for k, v in emotion_dict.items()])
             # Include ad context if available
             ad_info = ""
             if ad_context:
             prompt = f"""
             Analyze the facial expression and emotion of a person watching an advertisement{ad_info}.
+            Use these combined inputs:{metrics_formatted}{deepface_formatted}
             Provide two outputs:
             1. User State: A short 1-3 word description of their emotional/cognitive state
         except Exception as e:
             print(f"Error calling Gemini for metric interpretation: {e}")
+            traceback.print_exc()
             return "Error", f"Error analyzing facial metrics: {str(e)}"
 # --- DeepFace Analysis Function ---
         # Analyze with DeepFace
         analysis = DeepFace.analyze(
             img_path=temp_img,
+            actions=['emotion'],
             enforce_detection=False,  # Don't throw error if face not detected
             detector_backend='opencv'  # Faster detection
         )
         arsl += 0.1
         dom -= 0.1
     # Illustrative Context Adjustments from ad
     ad_type = ad_context.get('ad_type', 'Unknown')
     gem_txt = str(ad_context.get('gemini_ad_analysis', '')).lower()
             if not deepface_results or "region" not in deepface_results:
                 face_data = detect_face_opencv(video_file)
             # Calculate metrics if face detected
             if deepface_results or face_data:
                 calculated_metrics = calculate_metrics_from_deepface(deepface_results, ad_context)
+                user_state, enhanced_state = interpret_metrics_with_gemini(calculated_metrics, deepface_results, ad_context)
                 # Create a row for the dataframe
                 row = {
                     **calculated_metrics,
                     **ad_context,
                     'user_state': user_state,
+                    'enhanced_user_state': enhanced_state
                 }
                 metrics_data.append(row)
     metrics_data = []
     processed_frames = []
     frame_count = 0
     if show_progress:
         print(f"Processing video with {total_frames} frames at {fps} FPS")
         print(f"Ad Context: {ad_description} ({ad_type})")
     while True:
         ret, frame = cap.read()
             if not deepface_results or "region" not in deepface_results:
                 face_data = detect_face_opencv(frame)
             # Calculate metrics if face detected
             if deepface_results or face_data:
                 calculated_metrics = calculate_metrics_from_deepface(deepface_results, ad_context)
+                user_state, enhanced_state = interpret_metrics_with_gemini(calculated_metrics, deepface_results, ad_context)
                 # Create a row for the dataframe
                 row = {
                     **calculated_metrics,
                     **ad_context,
                     'user_state': user_state,
+                    'enhanced_user_state': enhanced_state
                 }
                 metrics_data.append(row)
     ad_context: Dict[str, Any],
     metrics_data: pd.DataFrame,
     frame_count: int,
+    start_time: float
+) -> Tuple[np.ndarray, Dict[str, float], str, pd.DataFrame]:
     """
     Process a single webcam frame
         metrics_data: DataFrame to accumulate metrics
         frame_count: Current frame count
         start_time: Start time of the session
     Returns:
+        Tuple of (annotated_frame, metrics_dict, enhanced_state, updated_metrics_df)
     """
     if frame is None:
+        return None, None, None, metrics_data
     # Analyze with DeepFace
     deepface_results = analyze_face_with_deepface(frame)
     if not deepface_results or "region" not in deepface_results:
         face_data = detect_face_opencv(frame)
     # Calculate metrics if face detected
     if deepface_results or face_data:
         calculated_metrics = calculate_metrics_from_deepface(deepface_results, ad_context)
+        user_state, enhanced_state = interpret_metrics_with_gemini(calculated_metrics, deepface_results, ad_context)
         # Create a row for the dataframe
         current_time = time.time()
             **calculated_metrics,
             **ad_context,
             'user_state': user_state,
+            'enhanced_user_state': enhanced_state
         }
         # Add row to DataFrame
         # Annotate the frame
         annotated_frame = annotate_frame(frame, face_data, deepface_results, calculated_metrics, enhanced_state)
+        return annotated_frame, calculated_metrics, enhanced_state, metrics_data
     else:
         # No face detected
         no_face_frame = frame.copy()
         cv2.putText(no_face_frame, "No face detected", (30, 30),
                     cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
+        return no_face_frame, None, "No face detected", metrics_data
 def start_webcam_session(
     ad_description: str = "",
         "last_saved": 0,
         "record_video": record_video,
         "recorded_frames": [] if record_video else None,
+        "timestamps": [] if record_video else None
     }
     return session
 def update_webcam_session(
     session: Dict[str, Any],
     frame: np.ndarray
+) -> Tuple[np.ndarray, Dict[str, float], str, Dict[str, Any]]:
     """
     Update webcam session with a new frame
         frame: New frame from webcam
     Returns:
+        Tuple of (annotated_frame, metrics_dict, enhanced_state, updated_session)
     """
     # Process the frame
+    annotated_frame, metrics, enhanced_state, updated_df = process_webcam_frame(
         frame,
         session["ad_context"],
         session["metrics_data"],
         session["frame_count"],
+        session["start_time"]
     )
     # Update session
     session["frame_count"] += 1
     session["metrics_data"] = updated_df
     # Record frame if enabled
     if session["record_video"] and annotated_frame is not None:
             updated_df.to_csv(session["csv_path"], index=False)
         session["last_saved"] = session["frame_count"]
+    return annotated_frame, metrics, enhanced_state, session
 def end_webcam_session(session: Dict[str, Any]) -> Tuple[str, str]:
     """
 def create_api_interface():
     with gr.Blocks(title="Facial Analysis APIs") as iface:
         gr.Markdown(f"""
+        # Enhanced Facial Analysis APIs (DeepFace)
         This interface provides two API endpoints:
         2. **Webcam API**: Analyze live webcam feed in real-time
         Both APIs use DeepFace for emotion analysis and Google's Gemini API for enhanced interpretations.
         """)
         with gr.Tab("Video File API"):
                         with gr.Column():
                             enhanced_state_txt = gr.Textbox(label="Enhanced State Analysis", lines=3)
                     with gr.Row():
                         download_csv = gr.File(label="Download Session Data")
                         download_video = gr.Video(label="Recorded Session")
             def process_frame(frame, session):
                 if session is None:
+                    return frame, None, "No active session. Click 'Start Session' to begin.", session
                 # Process the frame
+                annotated_frame, metrics, enhanced_state, updated_session = update_webcam_session(session, frame)
                 # Update the metrics plot if metrics available
                 if metrics:
                     metrics_plot = update_metrics_visualization(metrics)
+                    return annotated_frame, metrics_plot, enhanced_state, updated_session
                 else:
                     # Return the annotated frame (likely with "No face detected")
+                    return annotated_frame, None, enhanced_state or "No metrics available", updated_session
             def end_session(session):
                 if session is None:
             webcam_input.stream(
                 process_frame,
                 inputs=[webcam_input, session_data],
+                outputs=[processed_output, metrics_plot, enhanced_state_txt, session_data]
             )
             end_session_btn.click(
 # Entry point
 if __name__ == "__main__":
+    print("Starting Enhanced Facial Analysis API (DeepFace)...")
     print(f"Gemini API {'enabled' if GEMINI_ENABLED else 'disabled (using simulation)'}")
     iface = create_api_interface()
     iface.launch(debug=True)