clockclock commited on
Commit
9b9f42f
Β·
verified Β·
1 Parent(s): eadab79

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +143 -122
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py
2
  import pandas as pd
3
  import numpy as np
4
  import matplotlib.pyplot as plt
@@ -25,7 +25,8 @@ class EnhancedAIvsRealGazeAnalyzer:
25
  self.correct_answers = {'Pair1': 'B', 'Pair2': 'B', 'Pair3': 'B', 'Pair4': 'B', 'Pair5': 'B', 'Pair6': 'B'}
26
  self.combined_data = None
27
  self.fixation_data = {}
28
- self.participant_list = []
 
29
  self.model = None
30
  self.scaler = None
31
  self.feature_names = []
@@ -34,7 +35,7 @@ class EnhancedAIvsRealGazeAnalyzer:
34
  participant_col = next((c for c in df.columns if 'participant' in str(c).lower()), None)
35
  if not participant_col:
36
  raise ValueError(f"Could not find a 'participant' column in the file: {filename}")
37
- df.rename(columns={participant_col: 'participant_id'}, inplace=True)
38
  df['participant_id'] = df['participant_id'].astype(str)
39
  return df
40
 
@@ -52,48 +53,9 @@ class EnhancedAIvsRealGazeAnalyzer:
52
 
53
  all_metrics_dfs = []
54
  for q in self.questions:
55
- file_path = f"{base_path summary_text, fig, gr.Slider(maximum=slider_max, value=fixation_num, interactive=True)
56
-
57
- def analyze_rq1_metric(self, metric):
58
- if not metric or metric not in self.combined_data.columns: return None, "Metric not found."
59
- correct = self.combined_data.loc[self.combined_data['Answer_Correctness'] == 'Correct', metric].dropna()
60
- incorrect = self.combined_data.loc[self.combined_data['Answer_Correctness'] == 'Incorrect', metric].dropna()
61
- if len(correct) < 2 or len(incorrect) < 2: return None, "Not enough data for both groups to compare."
62
- t_stat, p_val = stats.ttest_ind(incorrect, correct, equal_var=False, nan_policy='omit')
63
- fig, ax = plt.subplots(figsize=(8, 6)); sns.boxplot(data=self.combined_data, x='Answer_Correctness', y=metric, ax=ax, palette=['#66b3ff','#ff9999']); ax.set_title(f'Comparison of "{metric}" by Answer Correctness', fontsize=14); ax.set_xlabel("Answer Correctness"); ax.set_ylabel(metric); plt.tight_layout()
64
- summary = f"""### Analysis for: **{metric}**\n- **Mean (Correct Answers):** {correct.mean():.4f}\n- **Mean (Incorrect Answers):** {incorrect.mean():.4f}\n- **T-test p-value:** {p_val:.4f}\n\n**Conclusion:**\n- {'There is a **statistically significant** difference (p < 0.05).' if p_val < 0.05 else 'There is **no statistically significant** difference (p >= 0.05).'}"""
65
- return fig, summary
66
-
67
- # --- DATA SETUP & GRADIO APP ---
68
- def setup_and_load_data():
69
- repo_url = "https://github.com/RextonRZ/GenAIEyeTrackingCleanedDataset"
70
- repo_dir = "GenAIEyeTrackingCleanedDataset"
71
- if not os.path.exists(repo_dir): git.Repo.clone_from(repo_url, repo_dir)
72
- else: print("Data repository already exists.")
73
- base_path = repo_dir
74
- response_file_path = os.path.join(repo_dir, "GenAI Response.xlsx")
75
- analyzer = EnhancedAIvsRealGazeAnalyzer().load_and_process_data(base_path, response_file_path)
76
- return analyzer
77
-
78
- analyzer = setup_and_load_data()
79
-
80
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
81
- gr.Markdown("# Interactive Dashboard: AI vs. Real Gaze Analysis")
82
- with gr.Tabs() as tabs:
83
- with gr.TabItem("πŸ“Š RQ1: Viewing Time vs. Correctness", id=0):
84
- # ... (UI is the same)
85
- with gr.TabItem("πŸ€– RQ2: Predicting Correctness from Gaze", id=1):
86
- with gr.Row():
87
- with gr.Column(scale=1):
88
- gr.Markdown("#### Tune Model Hyperparameters")
89
- rq2_test_size_slider=gr.Slider(minimum=0.1, maximum=0.5, step=0.05, value=0.3, label="Test Set Size")
90
- rq2_estimators_slider=gr.Slider(minimum=10, maximum=200, step=10, value=100, label="Number of Trees")
91
- rq2_status = gr.Markdown("Train a model to enable the Gaze Playback tab.")
92
- with gr.Column(scale=2):
93
- # ... (UI is the same)
94
- with gr.TabItem("πŸ‘οΈ Gaze Playback & Real-Time Prediction", id=2):
95
- }/Filtered_GenAI_Metrics_cleaned_{q}.xlsx"
96
  if os.path.exists(file_path):
 
97
  xls = pd.ExcelFile(file_path)
98
  metrics_df = pd.read_excel(xls, sheet_name=0)
99
  metrics_df = self._find_and_standardize_participant_col(metrics_df, f"{q} Metrics")
@@ -105,99 +67,53 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
105
  fix_df = pd.read_excel(xls, sheet_name=1)
106
  fix_df = self._find_and_standardize_participant_col(fix_df, f"{q} Fixations")
107
  fix_df.dropna(subset=['Fixation point X', 'Fixation point Y', 'Gaze event duration (ms)'], inplace=True)
108
- for participant, group in fix_df.groupby('participant_id'):
109
- self.fixation_data[(participant, q)] = group.reset_index(drop=True)
 
 
 
 
110
  except Exception as e:
111
  print(f" -> WARNING: Could not load fixation sheet for {q}. Error: {e}")
112
 
113
  if not all_metrics_dfs: raise ValueError("No aggregated metrics files were found.")
114
  self.combined_data = pd.concat(all_metrics_dfs, ignore_index=True)
115
- q_to_pair# ... (UI is the same)
116
-
117
- # The UI structure is identical to before, just add the new status component
118
- # This is a bit of a rewrite to use the ids for clarity.
119
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
120
- gr.Markdown("# Interactive Dashboard: AI vs. Real Gaze Analysis")
121
- with gr.Tabs() as tabs:
122
- with gr.TabItem("πŸ“Š RQ1: Viewing Time vs. Correctness", id=0):
123
- with gr.Row():
124
- with gr.Column(scale=1):
125
- rq1_metric_dropdown = gr.Dropdown(choices=analyzer.time_metrics = {f'Q{i+1}': f'Pair{i+1}' for i in range(6)}
126
  self.combined_data['Pair'] = self.combined_data['Question'].map(q_to_pair)
127
  self.combined_data = self.combined_data.merge(response_long, on=['participant_id', 'Pair'], how='left')
128
- self.combined_data['Answer_Correctness, label="Select a Time-Based Metric", value=analyzer.time_metrics[0] if analyzer.time_metrics else None)
129
- rq1_summary_output = gr.Markdown(label="Statistical Summary")
130
- with gr.Column(scale=2):
131
- rq1_plot_output = gr.Plot(label="Metric Comparison")
132
- with gr.TabItem("πŸ€– RQ2: Predicting Correctness from Gaze", id=1):
133
- with gr.Row():
134
- with gr.Column(scale=1):
135
- gr'] = self.combined_data['Correct'].map({True: 'Correct', False: 'Incorrect'})
136
- .Markdown("#### Tune Model Hyperparameters")
137
- rq2_test_size_slider = gr.Slider(minimum=0.1, maximum=0.5, step=0.05, value=0.3
138
- self.numeric_cols = self.combined_data.select_dtypes(include=np.number).columns.tolist()
139
- self.time_metrics = [c for c in self.numeric_cols if any, label="Test Set Size")
140
- rq2_estimators_slider = gr.Slider(minimum=10(k in c.lower() for k in ['time', 'duration', 'fixation'])]
141
 
142
- , maximum=200, step=10, value=100, label="Number of Trees")# KEY FIX: Participant list is now derived ONLY from trials with valid fixation data.
143
- self.participant_list
144
- rq2_status = gr.Markdown("Train a model to enable the Gaze Playback tab.")
145
- = sorted(list(set([key[0] for key in self.fixation_data.keys()]))) with gr.Column(scale=2):
146
- rq2_summary_output = gr.Markdown(label
147
- print(f"--- Data Loading Successful. Found {len(self.participant_list)} participants with fixation data.="Model Performance Summary")
148
- rq2_table_output = gr.Dataframe(label="Classification Report", ---")
149
  return self
150
 
151
- def run_prediction_model(self, test_size, n_estimators interactive=False)
152
- rq2_plot_output = gr.Plot(label="Feature Importance")
153
- ):
154
- leaky_features = ['Correct', 'participant_id']
155
- self.feature_names = [with gr.TabItem("πŸ‘οΈ Gaze Playback & Real-Time Prediction", id=2):
156
- col for col in self.combined_data.select_dtypes(include=np.number).columns if col not in leaky_with gr.Row():
157
- with gr.Column(scale=1):
158
- gr.Markdown("### See the Prediction Efeatures]
159
  features = self.combined_data[self.feature_names].copy()
160
- target = self.combinedvolve with Every Glance!")
161
- playback_participant = gr.Dropdown(choices=analyzer.participant_list, label_data['Answer_Correctness'].map({'Correct': 1, 'Incorrect': 0})
162
- valid="Select Participant")
163
- playback_question = gr.Dropdown(choices=analyzer.questions, label="Select Question_indices = target.notna()
164
- features, target = features[valid_indices], target[valid_")
165
- gr.Markdown("Use the slider to play back fixations one by one.")
166
- playback_sliderindices]
167
  features = features.fillna(features.median()).fillna(0)
168
- if len(target = gr.Slider(minimum=0, maximum=1, step=1, value=0, label="Fix.unique()) < 2: return "Not enough data to train.", None, None
169
- X_train, X_testation Number", interactive=False)
170
- playback_summary = gr.Markdown(label="Trial Info")
171
- with gr.Column(scale=2):
172
- playback_plot = gr.Plot(label="Gaze Play, y_train, y_test = train_test_split(features, target, test_size=test_size, random_state=42, stratify=target)
173
- self.scaler = StandardScaler().fitback & Live Prediction")
174
-
175
- outputs_rq2 = [rq2_summary_output, rq2_table_output, rq2_plot_output, rq2_status]
176
- outputs_playback = [playback_summary, playback(X_train)
177
  self.model = RandomForestClassifier(n_estimators=int(n_estimators), random_state=42, class_weight='balanced').fit(self.scaler.transform(X_train), y_train)
178
- _plot, playback_slider]
179
- rq1_metric_dropdown.change(fn=analyzer.analyze_rq1_metric, inputs=rq1_metric_dropdown, outputs=[rq1_plot_output, rq report = classification_report(y_test, self.model.predict(self.scaler.transform(X_test)), target_names=['Incorrect', 'Correct'], output_dict=True)
180
- auc_score =1_summary_output])
181
- rq2_test_size_slider.release(fn=analyzer.run_prediction_model, inputs=[rq2_test_size_slider, rq2_estimators_slider], outputs=outputs roc_auc_score(y_test, self.model.predict_proba(self.scaler.transform(X_test))[:, 1])
182
- summary_md = f"### Model Performance\n- **AUC_rq2)
183
- rq2_estimators_slider.release(fn=analyzer.run_prediction_model, inputs=[rq2_test_size_slider, rq2_estimators_slider], outputs=outputs_rq Score:** **{auc_score:.4f}**\n- **Overall Accuracy:** {report['accuracy']:.3f}"
184
  report_df = pd.DataFrame(report).transpose().round(3)
185
- feature_importance = pd.DataFrame({'Feature': self.feature_names, 'Importance': self.model.feature2)
186
- playback_inputs = [playback_participant, playback_question, playback_slider]
187
- playback_participant.change(lambda: 0, None, playback_slider).then(fn=analyzer.generate_gaze_playback, inputs=playback_inputs, outputs=outputs_playback)
188
- playback_question.change(lambda_importances_}).sort_values('Importance', ascending=False).head(15)
189
- fig, ax = plt.subplots(figsize=(10, 8)); sns.barplot(data=feature_importance, x='Importance', y='Feature', ax=ax, palette='viridis'); ax.set_title(f': 0, None, playback_slider).then(fn=analyzer.generate_gaze_playback, inputs=playback_inputs, outputs=outputs_playback)
190
- playback_slider.release(fn=analyzer.generate_gaze_playback, inputs=playback_inputs, outputs=outputs_playback)
191
-
192
- demo.load(Top 15 Predictive Features', fontsize=14); plt.tight_layout()
193
- return summary_md, report_df, fig
194
-
195
- def _recalculate_features_from_fixations(self, fixations_df):fn=analyzer.analyze_rq1_metric, inputs=rq1_metric_dropdown, outputs=[rq1_plot_output, rq1_summary_output])
196
- demo.load(fn=analyzer.run_prediction
197
  feature_vector = pd.Series(0.0, index=self.feature_names)
198
  if fixations_df.empty: return feature_vector.fillna(0).values.reshape(1, -1)
199
  if 'AOI name' in fixations_df.columns:
200
- for aoi_name,_model, inputs=[rq2_test_size_slider, rq2_estimators_slider], outputs=outputs group in fixations_df.groupby('AOI name'):
201
  col_name = f'Total fixation duration on {aoi_name}'
202
  if col_name in feature_vector.index:
203
  feature_vector[col_name] = group['Gaze event duration (ms)'].sum()
@@ -205,9 +121,10 @@ _plot, playback_slider]
205
  return feature_vector.fillna(0).values.reshape(1, -1)
206
 
207
  def generate_gaze_playback(self, participant, question, fixation_num):
 
208
  trial_key = (str(participant), question)
209
  if not participant or not question or trial_key not in self.fixation_data:
210
- return "**No fixation data found for this trial.**", None, gr.Slider(interactive=False, value=0)
211
 
212
  all_fixations = self.fixation_data[trial_key]
213
  fixation_num = int(fixation_num)
@@ -220,7 +137,111 @@ _plot, playback_slider]
220
  prob_correct = prediction_prob[1]
221
 
222
  fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8), gridspec_kw={'height_ratios': [4, 1]})
223
- fig.suptitle_rq2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
 
225
  if __name__ == "__main__":
226
  demo.launch()
 
1
+ # app.py (Final, Robust, and Workable Version with Chained Dropdowns)
2
  import pandas as pd
3
  import numpy as np
4
  import matplotlib.pyplot as plt
 
25
  self.correct_answers = {'Pair1': 'B', 'Pair2': 'B', 'Pair3': 'B', 'Pair4': 'B', 'Pair5': 'B', 'Pair6': 'B'}
26
  self.combined_data = None
27
  self.fixation_data = {}
28
+ self.valid_playback_participants = []
29
+ self.valid_playback_trials = {}
30
  self.model = None
31
  self.scaler = None
32
  self.feature_names = []
 
35
  participant_col = next((c for c in df.columns if 'participant' in str(c).lower()), None)
36
  if not participant_col:
37
  raise ValueError(f"Could not find a 'participant' column in the file: {filename}")
38
+ df = df.rename(columns={participant_col: 'participant_id'})
39
  df['participant_id'] = df['participant_id'].astype(str)
40
  return df
41
 
 
53
 
54
  all_metrics_dfs = []
55
  for q in self.questions:
56
+ file_path = f"{base_path}/Filtered_GenAI_Metrics_cleaned_{q}.xlsx"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  if os.path.exists(file_path):
58
+ print(f"Processing {file_path}...")
59
  xls = pd.ExcelFile(file_path)
60
  metrics_df = pd.read_excel(xls, sheet_name=0)
61
  metrics_df = self._find_and_standardize_participant_col(metrics_df, f"{q} Metrics")
 
67
  fix_df = pd.read_excel(xls, sheet_name=1)
68
  fix_df = self._find_and_standardize_participant_col(fix_df, f"{q} Fixations")
69
  fix_df.dropna(subset=['Fixation point X', 'Fixation point Y', 'Gaze event duration (ms)'], inplace=True)
70
+ for participant_id, group in fix_df.groupby('participant_id'):
71
+ self.fixation_data[(participant_id, q)] = group.reset_index(drop=True)
72
+ if participant_id not in self.valid_playback_trials:
73
+ self.valid_playback_trials[participant_id] = []
74
+ self.valid_playback_trials[participant_id].append(q)
75
+ print(f" -> Successfully loaded {len(fix_df)} fixations for {q}.")
76
  except Exception as e:
77
  print(f" -> WARNING: Could not load fixation sheet for {q}. Error: {e}")
78
 
79
  if not all_metrics_dfs: raise ValueError("No aggregated metrics files were found.")
80
  self.combined_data = pd.concat(all_metrics_dfs, ignore_index=True)
81
+ q_to_pair = {f'Q{i+1}': f'Pair{i+1}' for i in range(6)}
 
 
 
 
 
 
 
 
 
 
82
  self.combined_data['Pair'] = self.combined_data['Question'].map(q_to_pair)
83
  self.combined_data = self.combined_data.merge(response_long, on=['participant_id', 'Pair'], how='left')
84
+ self.combined_data['Answer_Correctness'] = self.combined_data['Correct'].map({True: 'Correct', False: 'Incorrect'})
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
+ self.numeric_cols = self.combined_data.select_dtypes(include=np.number).columns.tolist()
87
+ self.time_metrics = [c for c in self.numeric_cols if any(k in c.lower() for k in ['time', 'duration', 'fixation'])]
88
+ self.valid_playback_participants = sorted(list(self.valid_playback_trials.keys()))
89
+ print(f"--- Data Loading Successful. Found {len(self.valid_playback_participants)} participants with fixation data. ---")
 
 
 
90
  return self
91
 
92
+ def run_prediction_model(self, test_size, n_estimators):
93
+ leaky_features = ['participant_id', 'Correct', 'Total_Correct', 'Overall_Accuracy']
94
+ self.feature_names = [col for col in self.numeric_cols if col not in leaky_features]
 
 
 
 
 
95
  features = self.combined_data[self.feature_names].copy()
96
+ target = self.combined_data['Answer_Correctness'].map({'Correct': 1, 'Incorrect': 0})
97
+ valid_indices = target.notna()
98
+ features, target = features[valid_indices], target[valid_indices]
 
 
 
 
99
  features = features.fillna(features.median()).fillna(0)
100
+ if len(target.unique()) < 2: return "Not enough data to train.", None, None, gr.Markdown("Model not trained yet.")
101
+ X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=test_size, random_state=42, stratify=target)
102
+ self.scaler = StandardScaler().fit(X_train)
 
 
 
 
 
 
103
  self.model = RandomForestClassifier(n_estimators=int(n_estimators), random_state=42, class_weight='balanced').fit(self.scaler.transform(X_train), y_train)
104
+ report = classification_report(y_test, self.model.predict(self.scaler.transform(X_test)), target_names=['Incorrect', 'Correct'], output_dict=True)
105
+ auc_score = roc_auc_score(y_test, self.model.predict_proba(self.scaler.transform(X_test))[:, 1])
106
+ summary_md = f"### Model Performance\n- **AUC Score:** **{auc_score:.4f}**\n- **Overall Accuracy:** {report['accuracy']:.3f}"
 
 
 
107
  report_df = pd.DataFrame(report).transpose().round(3)
108
+ feature_importance = pd.DataFrame({'Feature': self.feature_names, 'Importance': self.model.feature_importances_}).sort_values('Importance', ascending=False).head(15)
109
+ fig, ax = plt.subplots(figsize=(10, 8)); sns.barplot(data=feature_importance, x='Importance', y='Feature', ax=ax, palette='viridis'); ax.set_title(f'Top 15 Predictive Features (n_estimators={int(n_estimators)})', fontsize=14); plt.tight_layout()
110
+ return summary_md, report_df, fig, gr.Markdown("βœ… **Model is ready!** You can now use the Gaze Playback tab.")
111
+
112
+ def _recalculate_features_from_fixations(self, fixations_df):
 
 
 
 
 
 
 
113
  feature_vector = pd.Series(0.0, index=self.feature_names)
114
  if fixations_df.empty: return feature_vector.fillna(0).values.reshape(1, -1)
115
  if 'AOI name' in fixations_df.columns:
116
+ for aoi_name, group in fixations_df.groupby('AOI name'):
117
  col_name = f'Total fixation duration on {aoi_name}'
118
  if col_name in feature_vector.index:
119
  feature_vector[col_name] = group['Gaze event duration (ms)'].sum()
 
121
  return feature_vector.fillna(0).values.reshape(1, -1)
122
 
123
  def generate_gaze_playback(self, participant, question, fixation_num):
124
+ if self.model is None: return "Please train a model in Tab 2 first.", None, gr.Slider(interactive=False)
125
  trial_key = (str(participant), question)
126
  if not participant or not question or trial_key not in self.fixation_data:
127
+ return "Please select a valid trial.", None, gr.Slider(interactive=False, value=0)
128
 
129
  all_fixations = self.fixation_data[trial_key]
130
  fixation_num = int(fixation_num)
 
137
  prob_correct = prediction_prob[1]
138
 
139
  fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8), gridspec_kw={'height_ratios': [4, 1]})
140
+ fig.suptitle(f"Gaze Playback for {participant} - {question}", fontsize=16, weight='bold')
141
+ ax1.set_title(f"Displaying Fixations 1 through {fixation_num}/{slider_max}")
142
+ ax1.set_xlim(0, 1920); ax1.set_ylim(1080, 0)
143
+ ax1.set_aspect('equal'); ax1.tick_params(left=False, right=False, bottom=False, top=False, labelleft=False, labelbottom=False)
144
+ ax1.add_patch(patches.Rectangle((0, 0), 1920/2, 1080, facecolor='#e0e0e0'))
145
+ ax1.add_patch(patches.Rectangle((1920/2, 0), 1920/2, 1080, facecolor='#f0f0f0'))
146
+ ax1.text(1920*0.25, 50, "Image A", ha='center', fontsize=14, alpha=0.7)
147
+ ax1.text(1920*0.75, 50, "Image B", ha='center', fontsize=14, alpha=0.7)
148
+ if not current_fixations.empty:
149
+ points = current_fixations[['Fixation point X', 'Fixation point Y']]
150
+ ax1.plot(points['Fixation point X'], points['Fixation point Y'], marker='o', color='grey', alpha=0.5, linestyle='-')
151
+ ax1.scatter(points.iloc[-1]['Fixation point X'], points.iloc[-1]['Fixation point Y'], s=200, c='red', zorder=10, edgecolors='black', lw=2)
152
+
153
+ ax2.set_xlim(0, 1); ax2.set_yticks([])
154
+ ax2.set_title("Live Prediction Confidence (Answer is 'Correct')")
155
+ bar_color = 'green' if prob_correct > 0.5 else 'red'
156
+ ax2.barh([0], [prob_correct], color=bar_color, height=0.5, edgecolor='black')
157
+ ax2.axvline(0.5, color='black', linestyle='--', linewidth=1)
158
+ ax2.text(prob_correct, 0, f" {prob_correct:.1%} ", va='center', ha='left' if prob_correct < 0.9 else 'right', color='white', weight='bold', fontsize=12)
159
+ plt.tight_layout(rect=[0, 0, 1, 0.95])
160
+
161
+ trial_info = self.combined_data[(self.combined_data['participant_id'] == str(participant)) & (self.combined_data['Question'] == question)].iloc[0]
162
+ summary_text = f"**Actual Answer:** `{trial_info['Answer_Correctness']}`"
163
+
164
+ return summary_text, fig, gr.Slider(maximum=slider_max, value=fixation_num, interactive=True)
165
+
166
+ def analyze_rq1_metric(self, metric):
167
+ if not metric or metric not in self.combined_data.columns: return None, "Metric not found."
168
+ correct = self.combined_data.loc[self.combined_data['Answer_Correctness'] == 'Correct', metric].dropna()
169
+ incorrect = self.combined_data.loc[self.combined_data['Answer_Correctness'] == 'Incorrect', metric].dropna()
170
+ if len(correct) < 2 or len(incorrect) < 2: return None, "Not enough data for both groups to compare."
171
+ t_stat, p_val = stats.ttest_ind(incorrect, correct, equal_var=False, nan_policy='omit')
172
+ fig, ax = plt.subplots(figsize=(8, 6)); sns.boxplot(data=self.combined_data, x='Answer_Correctness', y=metric, ax=ax, palette=['#66b3ff','#ff9999']); ax.set_title(f'Comparison of "{metric}" by Answer Correctness', fontsize=14); ax.set_xlabel("Answer Correctness"); ax.set_ylabel(metric); plt.tight_layout()
173
+ summary = f"""### Analysis for: **{metric}**\n- **Mean (Correct Answers):** {correct.mean():.4f}\n- **Mean (Incorrect Answers):** {incorrect.mean():.4f}\n- **T-test p-value:** {p_val:.4f}\n\n**Conclusion:**\n- {'There is a **statistically significant** difference (p < 0.05).' if p_val < 0.05 else 'There is **no statistically significant** difference (p >= 0.05).'}"""
174
+ return fig, summary
175
+
176
+ def update_question_dropdown(self, participant):
177
+ """Dynamically updates the question dropdown based on the selected participant."""
178
+ valid_questions = self.valid_playback_trials.get(participant, [])
179
+ return gr.Dropdown(choices=sorted(valid_questions), interactive=True, value=None)
180
+
181
+ # --- DATA SETUP & GRADIO APP ---
182
+ def setup_and_load_data():
183
+ repo_url = "https://github.com/RextonRZ/GenAIEyeTrackingCleanedDataset"
184
+ repo_dir = "GenAIEyeTrackingCleanedDataset"
185
+ if not os.path.exists(repo_dir): git.Repo.clone_from(repo_url, repo_dir)
186
+ else: print("Data repository already exists.")
187
+ base_path = repo_dir
188
+ response_file_path = os.path.join(repo_dir, "GenAI Response.xlsx")
189
+ analyzer = EnhancedAIvsRealGazeAnalyzer().load_and_process_data(base_path, response_file_path)
190
+ return analyzer
191
+
192
+ analyzer = setup_and_load_data()
193
+
194
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
195
+ gr.Markdown("# Interactive Dashboard: AI vs. Real Gaze Analysis")
196
+ with gr.Tabs():
197
+ with gr.TabItem("πŸ“Š RQ1: Viewing Time vs. Correctness"):
198
+ with gr.Row():
199
+ with gr.Column(scale=1):
200
+ rq1_metric_dropdown=gr.Dropdown(choices=analyzer.time_metrics, label="Select a Time-Based Metric", value=analyzer.time_metrics[0] if analyzer.time_metrics else None)
201
+ rq1_summary_output=gr.Markdown(label="Statistical Summary")
202
+ with gr.Column(scale=2):
203
+ rq1_plot_output=gr.Plot(label="Metric Comparison")
204
+
205
+ with gr.TabItem("πŸ€– RQ2: Predicting Correctness from Gaze"):
206
+ with gr.Row():
207
+ with gr.Column(scale=1):
208
+ gr.Markdown("#### Tune Model Hyperparameters")
209
+ rq2_test_size_slider=gr.Slider(minimum=0.1, maximum=0.5, step=0.05, value=0.3, label="Test Set Size")
210
+ rq2_estimators_slider=gr.Slider(minimum=10, maximum=200, step=10, value=100, label="Number of Trees")
211
+ rq2_status = gr.Markdown("Train a model to enable the Gaze Playback tab.")
212
+ with gr.Column(scale=2):
213
+ rq2_summary_output=gr.Markdown(label="Model Performance Summary")
214
+ rq2_table_output=gr.Dataframe(label="Classification Report", interactive=False)
215
+ rq2_plot_output=gr.Plot(label="Feature Importance")
216
+
217
+ with gr.TabItem("πŸ‘οΈ Gaze Playback & Real-Time Prediction"):
218
+ gr.Markdown("### See the Prediction Evolve with Every Glance!")
219
+ with gr.Row():
220
+ with gr.Column(scale=1):
221
+ playback_participant=gr.Dropdown(choices=analyzer.valid_playback_participants, label="1. Select a Participant")
222
+ playback_question=gr.Dropdown(choices=[], label="2. Select a Question", interactive=False)
223
+ gr.Markdown("3. Use the slider to play back fixations one by one.")
224
+ playback_slider=gr.Slider(minimum=0, maximum=1, step=1, value=0, label="Fixation Number", interactive=False)
225
+ playback_summary=gr.Markdown(label="Trial Info")
226
+ with gr.Column(scale=2):
227
+ playback_plot=gr.Plot(label="Gaze Playback & Live Prediction")
228
+
229
+ # --- WIRING FOR ALL TABS ---
230
+ outputs_rq2 = [rq2_summary_output, rq2_table_output, rq2_plot_output, rq2_status]
231
+ outputs_playback = [playback_summary, playback_plot, playback_slider]
232
+
233
+ rq1_metric_dropdown.change(fn=analyzer.analyze_rq1_metric, inputs=rq1_metric_dropdown, outputs=[rq1_plot_output, rq1_summary_output])
234
+
235
+ rq2_test_size_slider.release(fn=analyzer.run_prediction_model, inputs=[rq2_test_size_slider, rq2_estimators_slider], outputs=outputs_rq2)
236
+ rq2_estimators_slider.release(fn=analyzer.run_prediction_model, inputs=[rq2_test_size_slider, rq2_estimators_slider], outputs=outputs_rq2)
237
+
238
+ # Chained dropdown logic for Tab 3
239
+ playback_participant.change(fn=analyzer.update_question_dropdown, inputs=playback_participant, outputs=playback_question)
240
+ playback_question.change(lambda: 0, None, playback_slider).then(fn=analyzer.generate_gaze_playback, inputs=[playback_participant, playback_question, playback_slider], outputs=outputs_playback)
241
+ playback_slider.release(fn=analyzer.generate_gaze_playback, inputs=[playback_participant, playback_question, playback_slider], outputs=outputs_playback)
242
+
243
+ demo.load(fn=analyzer.analyze_rq1_metric, inputs=rq1_metric_dropdown, outputs=[rq1_plot_output, rq1_summary_output])
244
+ demo.load(fn=analyzer.run_prediction_model, inputs=[rq2_test_size_slider, rq2_estimators_slider], outputs=outputs_rq2)
245
 
246
  if __name__ == "__main__":
247
  demo.launch()