Spaces:

clockclock
/

Interactive-Gaze-Analysis

Sleeping

App Files Files Community

clockclock commited on Jun 18

Commit

13605e4

verified ·

1 Parent(s): 25e6b09

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -47

app.py CHANGED Viewed

@@ -54,7 +54,6 @@ class EnhancedAIvsRealGazeAnalyzer:
         self.combined_data = pd.concat(all_dfs, ignore_index=True)
         self.combined_data.columns = self.combined_data.columns.str.strip()
-        # Dynamically find participant ID columns
         self.et_id_col = next((c for c in self.combined_data.columns if 'participant' in c.lower()), 'Participant name')
         resp_id_col = next((c for c in self.response_data.columns if 'participant' in c.lower()), 'Participant name')
@@ -74,15 +73,17 @@ class EnhancedAIvsRealGazeAnalyzer:
         self.numeric_cols = self.combined_data.select_dtypes(include=np.number).columns.tolist()
         self.time_metrics = [c for c in self.numeric_cols if any(k in c.lower() for k in ['time', 'duration', 'fixation'])]
-        self.participant_list = sorted(self.combined_data[self.et_id_col].unique().tolist())
-        # Pre-calculate group means for the explorer tab
         self.group_means = self.combined_data.groupby('Answer_Correctness')[self.numeric_cols].mean()
         print("Data loading complete.")
         return self
     def analyze_rq1_metric(self, metric):
-        """Analyzes a single metric for RQ1."""
         if not metric: return None, "Metric not found."
         correct = self.combined_data.loc[self.combined_data['Answer_Correctness'] == 'Correct', metric].dropna()
         incorrect = self.combined_data.loc[self.combined_data['Answer_Correctness'] == 'Incorrect', metric].dropna()
@@ -92,7 +93,6 @@ class EnhancedAIvsRealGazeAnalyzer:
         return fig, summary
     def run_prediction_model(self, test_size, n_estimators):
-        """Trains and evaluates the RandomForest model for RQ2."""
         leaky_features = ['Total_Correct', 'Overall_Accuracy', 'Correct', self.et_id_col]
         self.feature_names = [col for col in self.numeric_cols if col not in leaky_features and col in self.combined_data.columns]
         features = self.combined_data[self.feature_names].copy()
@@ -113,41 +113,29 @@ class EnhancedAIvsRealGazeAnalyzer:
         return summary_md, report_df, fig
     def analyze_individual_trial(self, participant, question):
-        """Generates a detailed report for a single participant-question trial."""
         if not participant or not question:
             return "Please select a participant and a question.", None, None
-        trial_data = self.combined_data[(self.combined_data[self.et_id_col] == participant) & (self.combined_data['Question'] == question)]
         if trial_data.empty:
             return f"No data found for {participant} on {question}.", None, None
         trial_data = trial_data.iloc[0]
         actual_answer = trial_data['Answer_Correctness']
-        # Model Prediction for this specific trial
         trial_features = trial_data[self.feature_names].values.reshape(1, -1)
         trial_features_scaled = self.scaler.transform(trial_features)
         prediction_prob = self.model.predict_proba(trial_features_scaled)[0]
         predicted_answer = "Correct" if prediction_prob[1] > 0.5 else "Incorrect"
-        # Summary Text
-        summary_md = f"""
-        ### Trial Breakdown: **{participant}** on **{question}**
-        - **Actual Answer:** `{actual_answer}`
-        - **Model Prediction:** `{predicted_answer}` (Confidence: {max(prediction_prob)*100:.1f}%)
-        """
-        # A vs B Gaze Bias Plot
         aoi_cols = [c for c in self.feature_names if ' A' in c or ' B' in c]
         a_cols = sorted([c for c in aoi_cols if ' A' in c])
         b_cols = sorted([c for c in aoi_cols if ' B' in c])
         plot_data = []
         for a_col, b_col in zip(a_cols, b_cols):
             base_name = a_col.replace(' A', '')
             plot_data.append({'AOI': base_name, 'Image': 'A', 'Value': trial_data[a_col]})
             plot_data.append({'AOI': base_name, 'Image': 'B', 'Value': trial_data[b_col]})
         fig, ax = plt.subplots(figsize=(10, 6))
         if plot_data:
             sns.barplot(data=pd.DataFrame(plot_data), x='Value', y='AOI', hue='Image', ax=ax, palette={'A': '#66b3ff', 'B': '#ff9999'})
@@ -155,26 +143,16 @@ class EnhancedAIvsRealGazeAnalyzer:
         else:
             ax.text(0.5, 0.5, 'No A/B Area of Interest data for this question.', ha='center')
         plt.tight_layout()
-        # Feature Report Card
         top_features = self.model.feature_importances_.argsort()[-5:][::-1]
         top_feature_names = [self.feature_names[i] for i in top_features]
         report_card_data = []
         for feature in top_feature_names:
-            report_card_data.append({
-                'Top Feature': feature,
-                'This Trial Value': f"{trial_data[feature]:.2f}",
-                'Avg (Correct)': f"{self.group_means.loc['Correct', feature]:.2f}",
-                'Avg (Incorrect)': f"{self.group_means.loc['Incorrect', feature]:.2f}"
-            })
         report_card_df = pd.DataFrame(report_card_data)
         return summary_md, fig, report_card_df
-# --- DATA SETUP (RUNS ONCE AT STARTUP) ---
 def setup_and_load_data():
-    """Clones the repo if not present and loads data."""
     repo_url = "https://github.com/RextonRZ/GenAIEyeTrackingCleanedDataset"
     repo_dir = "GenAIEyeTrackingCleanedDataset"
     if not os.path.exists(repo_dir):
@@ -209,9 +187,7 @@ def update_explorer_view(participant, question):
 # --- GRADIO INTERFACE DEFINITION ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# Interactive Dashboard: AI vs. Real Gaze Analysis\nExplore the eye-tracking dataset by interacting with the controls below. The data is automatically loaded from the public GitHub repository.")
     with gr.Tabs():
-        # --- TAB 1: RQ1 ---
         with gr.TabItem("📊 RQ1: Viewing Time vs. Correctness"):
             with gr.Row():
                 with gr.Column(scale=1):
@@ -219,8 +195,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                     rq1_summary_output = gr.Markdown(label="Statistical Summary")
                 with gr.Column(scale=2):
                     rq1_plot_output = gr.Plot(label="Metric Comparison")
-        # --- TAB 2: RQ2 ---
         with gr.TabItem("🤖 RQ2: Predicting Correctness from Gaze"):
             with gr.Row():
                 with gr.Column(scale=1):
@@ -231,8 +205,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                     rq2_summary_output = gr.Markdown(label="Model Performance Summary")
                     rq2_table_output = gr.Dataframe(label="Classification Report", interactive=False)
                     rq2_plot_output = gr.Plot(label="Feature Importance")
-        # --- TAB 3: INNOVATIVE EXPLORER ---
         with gr.TabItem("🔬 Individual Trial Explorer"):
             gr.Markdown("### Deep Dive into a Single Trial\nSelect a participant and a question to see a detailed breakdown of their gaze behavior.")
             with gr.Row():
@@ -244,22 +216,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                 with gr.Column(scale=2):
                     explorer_plot = gr.Plot(label="Gaze Bias (Image A vs. B)")
-    # --- WIRING FOR ALL TABS ---
     outputs_rq2 = [rq2_summary_output, rq2_table_output, rq2_plot_output]
     outputs_explorer = [explorer_summary, explorer_plot, explorer_report_card]
-    # Wiring for Tab 1
     rq1_metric_dropdown.change(fn=update_rq1_visuals, inputs=[rq1_metric_dropdown], outputs=[rq1_plot_output, rq1_summary_output])
-    # Wiring for Tab 2
     rq2_test_size_slider.release(fn=update_rq2_model, inputs=[rq2_test_size_slider, rq2_estimators_slider], outputs=outputs_rq2)
     rq2_estimators_slider.release(fn=update_rq2_model, inputs=[rq2_test_size_slider, rq2_estimators_slider], outputs=outputs_rq2)
-    # Wiring for Tab 3
     explorer_participant.change(fn=update_explorer_view, inputs=[explorer_participant, explorer_question], outputs=outputs_explorer)
     explorer_question.change(fn=update_explorer_view, inputs=[explorer_participant, explorer_question], outputs=outputs_explorer)
-    # Load initial state for all tabs when the app starts
     demo.load(fn=update_rq1_visuals, inputs=[rq1_metric_dropdown], outputs=[rq1_plot_output, rq1_summary_output])
     demo.load(fn=update_rq2_model, inputs=[rq2_test_size_slider, rq2_estimators_slider], outputs=outputs_rq2)

         self.combined_data = pd.concat(all_dfs, ignore_index=True)
         self.combined_data.columns = self.combined_data.columns.str.strip()
         self.et_id_col = next((c for c in self.combined_data.columns if 'participant' in c.lower()), 'Participant name')
         resp_id_col = next((c for c in self.response_data.columns if 'participant' in c.lower()), 'Participant name')
         self.numeric_cols = self.combined_data.select_dtypes(include=np.number).columns.tolist()
         self.time_metrics = [c for c in self.numeric_cols if any(k in c.lower() for k in ['time', 'duration', 'fixation'])]
+        # --- THIS IS THE CORRECTED LINE ---
+        # Convert all participant IDs to strings before sorting to handle mixed types.
+        self.participant_list = sorted([str(p) for p in self.combined_data[self.et_id_col].unique()])
+        # --- END OF CORRECTION ---
         self.group_means = self.combined_data.groupby('Answer_Correctness')[self.numeric_cols].mean()
         print("Data loading complete.")
         return self
     def analyze_rq1_metric(self, metric):
         if not metric: return None, "Metric not found."
         correct = self.combined_data.loc[self.combined_data['Answer_Correctness'] == 'Correct', metric].dropna()
         incorrect = self.combined_data.loc[self.combined_data['Answer_Correctness'] == 'Incorrect', metric].dropna()
         return fig, summary
     def run_prediction_model(self, test_size, n_estimators):
         leaky_features = ['Total_Correct', 'Overall_Accuracy', 'Correct', self.et_id_col]
         self.feature_names = [col for col in self.numeric_cols if col not in leaky_features and col in self.combined_data.columns]
         features = self.combined_data[self.feature_names].copy()
         return summary_md, report_df, fig
     def analyze_individual_trial(self, participant, question):
         if not participant or not question:
             return "Please select a participant and a question.", None, None
+        # Convert participant ID to string for matching, as the list is now all strings
+        trial_data = self.combined_data[(self.combined_data[self.et_id_col].astype(str) == str(participant)) & (self.combined_data['Question'] == question)]
         if trial_data.empty:
             return f"No data found for {participant} on {question}.", None, None
         trial_data = trial_data.iloc[0]
         actual_answer = trial_data['Answer_Correctness']
         trial_features = trial_data[self.feature_names].values.reshape(1, -1)
         trial_features_scaled = self.scaler.transform(trial_features)
         prediction_prob = self.model.predict_proba(trial_features_scaled)[0]
         predicted_answer = "Correct" if prediction_prob[1] > 0.5 else "Incorrect"
+        summary_md = f"""### Trial Breakdown: **{participant}** on **{question}**\n- **Actual Answer:** `{actual_answer}`\n- **Model Prediction:** `{predicted_answer}` (Confidence: {max(prediction_prob)*100:.1f}%)"""
         aoi_cols = [c for c in self.feature_names if ' A' in c or ' B' in c]
         a_cols = sorted([c for c in aoi_cols if ' A' in c])
         b_cols = sorted([c for c in aoi_cols if ' B' in c])
         plot_data = []
         for a_col, b_col in zip(a_cols, b_cols):
             base_name = a_col.replace(' A', '')
             plot_data.append({'AOI': base_name, 'Image': 'A', 'Value': trial_data[a_col]})
             plot_data.append({'AOI': base_name, 'Image': 'B', 'Value': trial_data[b_col]})
         fig, ax = plt.subplots(figsize=(10, 6))
         if plot_data:
             sns.barplot(data=pd.DataFrame(plot_data), x='Value', y='AOI', hue='Image', ax=ax, palette={'A': '#66b3ff', 'B': '#ff9999'})
         else:
             ax.text(0.5, 0.5, 'No A/B Area of Interest data for this question.', ha='center')
         plt.tight_layout()
         top_features = self.model.feature_importances_.argsort()[-5:][::-1]
         top_feature_names = [self.feature_names[i] for i in top_features]
         report_card_data = []
         for feature in top_feature_names:
+            report_card_data.append({'Top Feature': feature, 'This Trial Value': f"{trial_data[feature]:.2f}", 'Avg (Correct)': f"{self.group_means.loc['Correct', feature]:.2f}", 'Avg (Incorrect)': f"{self.group_means.loc['Incorrect', feature]:.2f}"})
         report_card_df = pd.DataFrame(report_card_data)
         return summary_md, fig, report_card_df
+# --- DATA SETUP ---
 def setup_and_load_data():
     repo_url = "https://github.com/RextonRZ/GenAIEyeTrackingCleanedDataset"
     repo_dir = "GenAIEyeTrackingCleanedDataset"
     if not os.path.exists(repo_dir):
 # --- GRADIO INTERFACE DEFINITION ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# Interactive Dashboard: AI vs. Real Gaze Analysis\nExplore the eye-tracking dataset by interacting with the controls below. The data is automatically loaded from the public GitHub repository.")
     with gr.Tabs():
         with gr.TabItem("📊 RQ1: Viewing Time vs. Correctness"):
             with gr.Row():
                 with gr.Column(scale=1):
                     rq1_summary_output = gr.Markdown(label="Statistical Summary")
                 with gr.Column(scale=2):
                     rq1_plot_output = gr.Plot(label="Metric Comparison")
         with gr.TabItem("🤖 RQ2: Predicting Correctness from Gaze"):
             with gr.Row():
                 with gr.Column(scale=1):
                     rq2_summary_output = gr.Markdown(label="Model Performance Summary")
                     rq2_table_output = gr.Dataframe(label="Classification Report", interactive=False)
                     rq2_plot_output = gr.Plot(label="Feature Importance")
         with gr.TabItem("🔬 Individual Trial Explorer"):
             gr.Markdown("### Deep Dive into a Single Trial\nSelect a participant and a question to see a detailed breakdown of their gaze behavior.")
             with gr.Row():
                 with gr.Column(scale=2):
                     explorer_plot = gr.Plot(label="Gaze Bias (Image A vs. B)")
     outputs_rq2 = [rq2_summary_output, rq2_table_output, rq2_plot_output]
     outputs_explorer = [explorer_summary, explorer_plot, explorer_report_card]
     rq1_metric_dropdown.change(fn=update_rq1_visuals, inputs=[rq1_metric_dropdown], outputs=[rq1_plot_output, rq1_summary_output])
     rq2_test_size_slider.release(fn=update_rq2_model, inputs=[rq2_test_size_slider, rq2_estimators_slider], outputs=outputs_rq2)
     rq2_estimators_slider.release(fn=update_rq2_model, inputs=[rq2_test_size_slider, rq2_estimators_slider], outputs=outputs_rq2)
     explorer_participant.change(fn=update_explorer_view, inputs=[explorer_participant, explorer_question], outputs=outputs_explorer)
     explorer_question.change(fn=update_explorer_view, inputs=[explorer_participant, explorer_question], outputs=outputs_explorer)
     demo.load(fn=update_rq1_visuals, inputs=[rq1_metric_dropdown], outputs=[rq1_plot_output, rq1_summary_output])
     demo.load(fn=update_rq2_model, inputs=[rq2_test_size_slider, rq2_estimators_slider], outputs=outputs_rq2)