mgbam commited on
Commit
5bd4d74
ยท
verified ยท
1 Parent(s): 4ef5154

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -183
app.py CHANGED
@@ -1,12 +1,13 @@
1
  # Odyssey - The AI Data Science Workspace
2
- # A demonstration of a state-of-the-art, AI-native analytic environment.
 
3
 
4
  import gradio as gr
5
  import pandas as pd
6
  import numpy as np
7
  import plotly.express as px
8
  import plotly.graph_objects as go
9
- import io, os, json, base64, logging, warnings, pickle, uuid
10
  from contextlib import redirect_stdout
11
  from datetime import datetime
12
 
@@ -18,13 +19,20 @@ from sklearn.metrics import roc_curve, auc, confusion_matrix, r2_score, mean_squ
18
  from sklearn.preprocessing import LabelEncoder
19
  from sklearn.impute import KNNImputer
20
 
 
 
 
 
 
 
 
21
  # --- Configuration ---
22
  warnings.filterwarnings('ignore')
23
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
24
 
25
  # --- UI Theme & Icons ---
26
  THEME = gr.themes.Monochrome(primary_hue="indigo", secondary_hue="blue", neutral_hue="slate").set(
27
- body_background_fill="radial-gradient(circle, rgba(20,20,80,1) 0%, rgba(0,0,10,1) 100%);",
28
  block_label_background_fill="rgba(255,255,255,0.05)",
29
  block_background_fill="rgba(255,255,255,0.05)",
30
  button_primary_background_fill="linear-gradient(90deg, #6A11CB 0%, #2575FC 100%)",
@@ -33,57 +41,49 @@ THEME = gr.themes.Monochrome(primary_hue="indigo", secondary_hue="blue", neutral
33
  )
34
  ICONS = {"overview": "๐Ÿ”ญ", "medic": "๐Ÿงช", "launchpad": "๐Ÿš€", "copilot": "๐Ÿ’ก", "export": "๐Ÿ“„"}
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  # --- Core State & Project Management ---
37
  def init_state():
38
- """Initializes a blank global state."""
39
  return {
40
- "project_name": None,
41
- "df_original": None,
42
- "df_modified": None,
43
- "metadata": None,
44
- "insights": None,
45
- "chat_history": [],
46
- "dynamic_dashboards": {}
47
  }
48
 
49
  def save_project(state):
50
- """Saves the entire application state to a .osyssey file."""
51
  if not state or not state.get("project_name"):
52
  return gr.update(value="Project needs a name to save.", interactive=True)
53
 
54
  filename = f"{state['project_name']}.odyssey"
55
- # Convert dataframes to pickle strings for serialization
56
- state_to_save = state.copy()
57
- if state_to_save['df_original'] is not None:
58
- state_to_save['df_original'] = state_to_save['df_original'].to_pickle()
59
- if state_to_save['df_modified'] is not None:
60
- state_to_save['df_modified'] = state_to_save['df_modified'].to_pickle()
61
-
62
  with open(filename, "wb") as f:
63
- pickle.dump(state_to_save, f)
64
-
65
- return gr.update(value=f"Project saved to {filename}", interactive=True)
66
 
67
  def load_project(file_obj):
68
  """Loads a .odyssey file into the application state."""
69
  if not file_obj: return init_state()
70
  with open(file_obj.name, "rb") as f:
71
- loaded_state = pickle.load(f)
72
-
73
- # Unpickle dataframes
74
- if loaded_state['df_original'] is not None:
75
- loaded_state['df_original'] = pd.read_pickle(io.BytesIO(loaded_state['df_original']))
76
- if loaded_state['df_modified'] is not None:
77
- loaded_state['df_modified'] = pd.read_pickle(io.BytesIO(loaded_state['df_modified']))
78
-
79
- return loaded_state
80
 
81
  def prime_data(file_obj, project_name):
82
  """Main function to load a new CSV, analyze it, and set the initial state."""
83
  if not file_obj: return init_state()
84
  df = pd.read_csv(file_obj.name)
85
 
86
- # Smart type conversion
87
  for col in df.select_dtypes(include=['object']).columns:
88
  try:
89
  df[col] = pd.to_datetime(df[col], errors='raise')
@@ -96,35 +96,29 @@ def prime_data(file_obj, project_name):
96
 
97
  return {
98
  "project_name": project_name or f"Project_{datetime.now().strftime('%Y%m%d_%H%M')}",
99
- "df_original": df,
100
- "df_modified": df.copy(),
101
- "metadata": metadata,
102
- "insights": insights,
103
- "chat_history": [],
104
- "dynamic_dashboards": {}
105
  }
106
 
107
  def extract_metadata(df):
108
  """Utility to get schema and column types."""
109
  return {
110
- 'shape': df.shape,
111
- 'columns': df.columns.tolist(),
112
  'numeric': df.select_dtypes(include=np.number).columns.tolist(),
113
  'categorical': df.select_dtypes(include=['object', 'category']).columns.tolist(),
114
  'datetime': df.select_dtypes(include='datetime').columns.tolist(),
115
  'dtypes': df.dtypes.apply(lambda x: x.name).to_dict()
116
  }
117
 
118
- # --- Helios Overview Engine ---
 
119
  def run_helios_engine(df, metadata):
120
- """The proactive analysis engine."""
121
  insights = {}
122
- # Missing Data
123
  missing = df.isnull().sum()
124
  insights['missing_data'] = missing[missing > 0].sort_values(ascending=False)
125
- # High Cardinality
126
  insights['high_cardinality'] = {c: df[c].nunique() for c in metadata['categorical'] if df[c].nunique() > 50}
127
- # Outlier Detection
128
  outliers = {}
129
  for col in metadata['numeric']:
130
  Q1, Q3 = df[col].quantile(0.25), df[col].quantile(0.75)
@@ -132,7 +126,7 @@ def run_helios_engine(df, metadata):
132
  count = ((df[col] < (Q1 - 1.5 * IQR)) | (df[col] > (Q3 + 1.5 * IQR))).sum()
133
  if count > 0: outliers[col] = count
134
  insights['outliers'] = outliers
135
- # ML Target Suggestions
136
  suggestions = []
137
  for col in metadata['categorical']:
138
  if df[col].nunique() == 2: suggestions.append(f"{col} (Classification)")
@@ -141,57 +135,21 @@ def run_helios_engine(df, metadata):
141
  insights['ml_suggestions'] = suggestions
142
  return insights
143
 
144
- # --- Asclepius Data Lab Handlers ---
145
- def medic_preview_imputation(state, col, num_method, cat_method):
146
- if not col or col not in state['df_modified'].columns: return None
147
- df_mod = state['df_modified'].copy()
148
-
149
- if col in state['metadata']['numeric']:
150
- if num_method == 'KNN':
151
- imputer = KNNImputer(n_neighbors=5)
152
- df_mod[col] = imputer.fit_transform(df_mod[[col]])
153
- else:
154
- value = df_mod[col].mean() if num_method == 'mean' else df_mod[col].median()
155
- df_mod[col].fillna(value, inplace=True)
156
-
157
- fig = go.Figure()
158
- fig.add_trace(go.Histogram(x=state['df_original'][col], name='Original', opacity=0.7))
159
- fig.add_trace(go.Histogram(x=df_mod[col], name='Imputed', opacity=0.7))
160
- fig.update_layout(barmode='overlay', title_text=f"Distribution for '{col}'", legend_title_text='Dataset')
161
- return fig
162
-
163
- elif col in state['metadata']['categorical']:
164
- if cat_method == "Create 'Missing' Category":
165
- df_mod[col] = df_mod[col].cat.add_categories("Missing").fillna("Missing") if hasattr(df_mod[col], 'cat') else df_mod[col].fillna("Missing")
166
- else: # Mode
167
- df_mod[col].fillna(df_mod[col].mode()[0], inplace=True)
168
-
169
- fig = go.Figure()
170
- fig.add_trace(go.Bar(x=state['df_original'][col].value_counts().index, y=state['df_original'][col].value_counts().values, name='Original'))
171
- fig.add_trace(go.Bar(x=df_mod[col].value_counts().index, y=df_mod[col].value_counts().values, name='Imputed'))
172
- return fig
173
- return None
174
-
175
- # --- Prometheus Launchpad Handlers ---
176
  def prometheus_run_model(state, target, features, model_name):
 
177
  if not target or not features: return None, None, "Select target and features."
178
  df = state['df_modified'].copy()
179
  df.dropna(subset=[target] + features, inplace=True)
180
 
181
- le_map = {}
182
  for col in [target] + features:
183
  if df[col].dtype.name in ['category', 'object']:
184
- le = LabelEncoder()
185
- df[col] = le.fit_transform(df[col])
186
- le_map[col] = le
187
 
188
  X, y = df[features], df[target]
189
  problem_type = "Classification" if y.nunique() <= 10 else "Regression"
190
 
191
- MODELS = {
192
- "Classification": {"Random Forest": RandomForestClassifier, "Logistic Regression": LogisticRegression},
193
- "Regression": {"Random Forest": RandomForestRegressor, "Linear Regression": LinearRegression}
194
- }
195
  if model_name not in MODELS[problem_type]: return None, None, "Invalid model for this problem type."
196
 
197
  model = MODELS[problem_type][model_name](random_state=42)
@@ -199,63 +157,95 @@ def prometheus_run_model(state, target, features, model_name):
199
  if problem_type == "Classification":
200
  scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
201
  report = f"**Cross-Validated Accuracy:** {np.mean(scores):.3f} ยฑ {np.std(scores):.3f}"
202
-
203
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
204
  model.fit(X_train, y_train)
205
-
206
- # ROC Curve
207
  y_prob = model.predict_proba(X_test)[:, 1]
208
  fpr, tpr, _ = roc_curve(y_test, y_prob)
209
- roc_auc = auc(fpr, tpr)
210
- fig1 = go.Figure(data=go.Scatter(x=fpr, y=tpr, mode='lines', name=f'ROC curve (area = {roc_auc:.2f})'))
211
- fig1.add_scatter(x=[0, 1], y=[0, 1], mode='lines', line=dict(dash='dash'), name='Random Chance')
212
  fig1.update_layout(title="ROC Curve")
213
-
214
- # Feature Importance
215
- if hasattr(model, 'feature_importances_'):
216
- fi = pd.Series(model.feature_importances_, index=features).sort_values(ascending=False)
217
- fig2 = px.bar(fi, title="Feature Importance")
218
- else: fig2 = go.Figure().update_layout(title="Feature Importance (Not available for this model)")
219
-
220
- return fig1, fig2, report
221
  else: # Regression
222
  scores = cross_val_score(model, X, y, cv=5, scoring='r2')
223
  report = f"**Cross-Validated Rยฒ Score:** {np.mean(scores):.3f} ยฑ {np.std(scores):.3f}"
224
-
225
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
226
  model.fit(X_train, y_train)
227
  preds = model.predict(X_test)
228
-
229
- # Residuals Plot
230
  residuals = y_test - preds
231
- fig1 = px.scatter(x=preds, y=residuals, title="Residuals vs. Predicted Plot", labels={'x': 'Predicted Values', 'y': 'Residuals'})
232
  fig1.add_hline(y=0, line_dash="dash")
233
 
234
- # Feature Importance
235
- if hasattr(model, 'feature_importances_'):
236
- fi = pd.Series(model.feature_importances_, index=features).sort_values(ascending=False)
237
- fig2 = px.bar(fi, title="Feature Importance")
238
- else: fig2 = go.Figure().update_layout(title="Feature Importance (Not available for this model)")
239
-
240
- return fig1, fig2, report
241
 
242
- # --- Athena Co-pilot Handlers ---
243
  def athena_respond(user_message, history, state, api_key):
244
- # Main co-pilot logic
245
- pass # This would contain the full logic from previous examples
 
 
 
 
 
 
 
 
 
 
 
246
 
247
- def render_dynamic_dashboard(state, dashboard_id):
248
- """Renders a dynamically generated dashboard from the state."""
249
- # This is a placeholder for the advanced dashboard rendering logic.
250
- # In a real scenario, this would execute the Gradio code string stored in state.
251
- if dashboard_id in state['dynamic_dashboards']:
252
- # This is where we would dynamically create the Gradio components
253
- # For this example, we'll return a placeholder
254
- return gr.Markdown(f"### Dashboard: {dashboard_id}\n(Dynamic rendering placeholder)")
255
- return gr.Markdown("Dashboard not found.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
 
257
- # --- UI Builder Functions ---
258
  def build_ui():
 
259
  with gr.Blocks(theme=THEME, title="Odyssey AI Data Workspace") as demo:
260
  state = gr.State(init_state())
261
 
@@ -263,10 +253,10 @@ def build_ui():
263
  # Left Sidebar - Command Center
264
  with gr.Column(scale=1):
265
  gr.Markdown("# ๐Ÿฆ‰ Odyssey")
266
-
267
  with gr.Accordion("๐Ÿ“‚ Project", open=True):
268
  project_name_input = gr.Textbox(label="Project Name", value="New_Project")
269
  file_input = gr.File(label="Upload CSV", file_types=[".csv"])
 
270
  with gr.Row():
271
  save_btn = gr.Button("Save")
272
  load_btn = gr.UploadButton("Load .odyssey")
@@ -274,44 +264,24 @@ def build_ui():
274
 
275
  # Navigation buttons
276
  overview_btn = gr.Button(f"{ICONS['overview']} Helios Overview")
277
- medic_btn = gr.Button(f"{ICONS['medic']} Asclepius Data Lab")
278
  launchpad_btn = gr.Button(f"{ICONS['launchpad']} Prometheus Launchpad")
279
  copilot_btn = gr.Button(f"{ICONS['copilot']} Athena Co-pilot")
280
- export_btn = gr.Button(f"{ICONS['export']} Export Report")
281
-
282
- # Global Info
283
- with gr.Accordion("Global Info", open=False):
284
- file_info_md = gr.Markdown("No file loaded.")
285
 
286
  # Right Panel - Main Workspace
287
  with gr.Column(scale=4):
288
  # --- Helios Overview Panel ---
289
  with gr.Column(visible=True) as overview_panel:
290
  gr.Markdown(f"# {ICONS['overview']} Helios Overview")
291
- gr.Markdown("A proactive, high-level summary of your dataset.")
292
- # Interactive dashboard components would go here
293
- helios_report_md = gr.Markdown("Upload data to begin analysis.")
294
 
295
- # --- Asclepius Data Lab Panel ---
296
- with gr.Column(visible=False) as medic_panel:
297
- gr.Markdown(f"# {ICONS['medic']} Asclepius Data Lab")
298
- gr.Markdown("Interactively clean and prepare your data.")
299
- # UI components for Data Medic
300
- medic_col_select = gr.Dropdown(label="Select Column to Clean")
301
- with gr.Row():
302
- medic_num_method = gr.Radio(['mean', 'median', 'KNN'], label="Numeric Imputation", value='mean')
303
- medic_cat_method = gr.Radio(['mode', "Create 'Missing' Category"], label="Categorical Imputation", value='mode')
304
- medic_preview_plot = gr.Plot()
305
- medic_apply_btn = gr.Button("Apply Changes to Session")
306
-
307
  # --- Prometheus Launchpad Panel ---
308
  with gr.Column(visible=False) as launchpad_panel:
309
  gr.Markdown(f"# {ICONS['launchpad']} Prometheus Launchpad")
310
- gr.Markdown("Train, evaluate, and understand predictive models.")
311
- # UI components for Launchpad
312
  with gr.Row():
313
  lp_target = gr.Dropdown(label="๐ŸŽฏ Target")
314
- lp_features = gr.Multiselect(label="โœจ Features")
 
315
  lp_model = gr.Dropdown(choices=["Random Forest", "Logistic Regression", "Linear Regression"], label="๐Ÿง  Model")
316
  lp_run_btn = gr.Button("๐Ÿš€ Launch Model Training (with CV)")
317
  lp_report_md = gr.Markdown()
@@ -322,54 +292,59 @@ def build_ui():
322
  # --- Athena Co-pilot Panel ---
323
  with gr.Column(visible=False) as copilot_panel:
324
  gr.Markdown(f"# {ICONS['copilot']} Athena Co-pilot")
325
- gr.Markdown("Your collaborative AI data scientist. Ask anything.")
326
- # Chatbot UI
327
- chatbot = gr.Chatbot(height=500)
328
- with gr.Accordion("AI Generated Dashboard", open=False) as dynamic_dash_accordion:
329
- dynamic_dash_output = gr.Group() # Placeholder for dynamic content
330
- chat_input = gr.Textbox(label="Your Request")
331
  chat_submit = gr.Button("Send", variant="primary")
332
 
333
  # --- Event Handling ---
334
-
335
- # Panel Navigation
336
- panels = [overview_panel, medic_panel, launchpad_panel, copilot_panel]
337
  def switch_panel(btn_idx):
338
  return [gr.update(visible=i == btn_idx) for i in range(len(panels))]
339
 
340
  overview_btn.click(lambda: switch_panel(0), None, panels)
341
- medic_btn.click(lambda: switch_panel(1), None, panels)
342
- launchpad_btn.click(lambda: switch_panel(2), None, panels)
343
- copilot_btn.click(lambda: switch_panel(3), None, panels)
344
 
345
- # File Upload Logic
346
- def on_upload(state, file, name):
347
- new_state = prime_data(file, name)
348
- # Update all UI components based on the new state
349
  helios_md = "No data loaded."
350
- if new_state.get('insights'):
351
- helios_md = f"### {ICONS['ml_suggestions']} ML Suggestions\n" + "\n".join([f"- `{s}`" for s in new_state['insights']['ml_suggestions']])
352
- # ... Add more sections for a full report
 
 
 
 
 
 
353
 
354
- file_info = f"**File:** `{os.path.basename(file.name)}`\n\n**Shape:** `{new_state['metadata']['shape']}`"
355
-
356
- all_cols = new_state['metadata']['columns']
357
- missing_cols = new_state['insights']['missing_data'].index.tolist()
358
-
359
- return new_state, helios_md, file_info, gr.update(choices=missing_cols), gr.update(choices=all_cols), gr.update(choices=all_cols)
 
 
360
 
361
- file_input.upload(on_upload, [state, file_input, project_name_input], [state, helios_report_md, file_info_md, medic_col_select, lp_target, lp_features])
362
-
363
- # Project Management
 
 
 
364
  save_btn.click(save_project, state, project_status)
365
 
366
- # Asclepius Live Preview
367
- medic_col_select.change(medic_preview_imputation, [state, medic_col_select, medic_num_method, medic_cat_method], medic_preview_plot)
368
- medic_num_method.change(medic_preview_imputation, [state, medic_col_select, medic_num_method, medic_cat_method], medic_preview_plot)
369
- medic_cat_method.change(medic_preview_imputation, [state, medic_col_select, medic_num_method, medic_cat_method], medic_preview_plot)
370
-
371
- # Prometheus Model Training
372
  lp_run_btn.click(prometheus_run_model, [state, lp_target, lp_features, lp_model], [lp_fig1, lp_fig2, lp_report_md])
 
 
 
 
 
 
373
 
374
  return demo
375
 
 
1
  # Odyssey - The AI Data Science Workspace
2
+ # A state-of-the-art, AI-native analytic environment.
3
+ # This script is a complete, self-contained Gradio application.
4
 
5
  import gradio as gr
6
  import pandas as pd
7
  import numpy as np
8
  import plotly.express as px
9
  import plotly.graph_objects as go
10
+ import io, os, json, pickle, logging, warnings, uuid
11
  from contextlib import redirect_stdout
12
  from datetime import datetime
13
 
 
19
  from sklearn.preprocessing import LabelEncoder
20
  from sklearn.impute import KNNImputer
21
 
22
+ # Optional: For AI features
23
+ try:
24
+ import google.generativeai as genai
25
+ except ImportError:
26
+ print("Warning: 'google-generativeai' not found. AI features will be disabled.")
27
+ genai = None
28
+
29
  # --- Configuration ---
30
  warnings.filterwarnings('ignore')
31
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
32
 
33
  # --- UI Theme & Icons ---
34
  THEME = gr.themes.Monochrome(primary_hue="indigo", secondary_hue="blue", neutral_hue="slate").set(
35
+ body_background_fill="radial-gradient(circle, rgba(10,20,50,1) 0%, rgba(0,0,10,1) 100%);",
36
  block_label_background_fill="rgba(255,255,255,0.05)",
37
  block_background_fill="rgba(255,255,255,0.05)",
38
  button_primary_background_fill="linear-gradient(90deg, #6A11CB 0%, #2575FC 100%)",
 
41
  )
42
  ICONS = {"overview": "๐Ÿ”ญ", "medic": "๐Ÿงช", "launchpad": "๐Ÿš€", "copilot": "๐Ÿ’ก", "export": "๐Ÿ“„"}
43
 
44
+ # --- Helper Functions ---
45
+ def safe_exec(code_string: str, local_vars: dict) -> tuple:
46
+ """Safely execute a string of Python code and capture its output."""
47
+ output_buffer = io.StringIO()
48
+ try:
49
+ with redirect_stdout(output_buffer):
50
+ exec(code_string, globals(), local_vars)
51
+ stdout = output_buffer.getvalue()
52
+ fig = local_vars.get('fig')
53
+ df_out = local_vars.get('df_result')
54
+ return stdout, fig, df_out, None
55
+ except Exception as e:
56
+ return None, None, None, f"Execution Error: {str(e)}"
57
+
58
  # --- Core State & Project Management ---
59
  def init_state():
60
+ """Initializes a blank global state dictionary."""
61
  return {
62
+ "project_name": None, "df_original": None, "df_modified": None,
63
+ "metadata": None, "insights": None, "chat_history": []
 
 
 
 
 
64
  }
65
 
66
  def save_project(state):
67
+ """Saves the entire application state to a .odyssey file."""
68
  if not state or not state.get("project_name"):
69
  return gr.update(value="Project needs a name to save.", interactive=True)
70
 
71
  filename = f"{state['project_name']}.odyssey"
 
 
 
 
 
 
 
72
  with open(filename, "wb") as f:
73
+ pickle.dump(state, f)
74
+ return gr.update(value=f"โœ… Project saved to {filename}", interactive=True)
 
75
 
76
  def load_project(file_obj):
77
  """Loads a .odyssey file into the application state."""
78
  if not file_obj: return init_state()
79
  with open(file_obj.name, "rb") as f:
80
+ return pickle.load(f)
 
 
 
 
 
 
 
 
81
 
82
  def prime_data(file_obj, project_name):
83
  """Main function to load a new CSV, analyze it, and set the initial state."""
84
  if not file_obj: return init_state()
85
  df = pd.read_csv(file_obj.name)
86
 
 
87
  for col in df.select_dtypes(include=['object']).columns:
88
  try:
89
  df[col] = pd.to_datetime(df[col], errors='raise')
 
96
 
97
  return {
98
  "project_name": project_name or f"Project_{datetime.now().strftime('%Y%m%d_%H%M')}",
99
+ "df_original": df, "df_modified": df.copy(), "metadata": metadata,
100
+ "insights": insights, "chat_history": []
 
 
 
 
101
  }
102
 
103
  def extract_metadata(df):
104
  """Utility to get schema and column types."""
105
  return {
106
+ 'shape': df.shape, 'columns': df.columns.tolist(),
 
107
  'numeric': df.select_dtypes(include=np.number).columns.tolist(),
108
  'categorical': df.select_dtypes(include=['object', 'category']).columns.tolist(),
109
  'datetime': df.select_dtypes(include='datetime').columns.tolist(),
110
  'dtypes': df.dtypes.apply(lambda x: x.name).to_dict()
111
  }
112
 
113
+ # --- Module-Specific Handlers ---
114
+
115
  def run_helios_engine(df, metadata):
116
+ """The proactive analysis engine for the Helios Overview."""
117
  insights = {}
 
118
  missing = df.isnull().sum()
119
  insights['missing_data'] = missing[missing > 0].sort_values(ascending=False)
 
120
  insights['high_cardinality'] = {c: df[c].nunique() for c in metadata['categorical'] if df[c].nunique() > 50}
121
+
122
  outliers = {}
123
  for col in metadata['numeric']:
124
  Q1, Q3 = df[col].quantile(0.25), df[col].quantile(0.75)
 
126
  count = ((df[col] < (Q1 - 1.5 * IQR)) | (df[col] > (Q3 + 1.5 * IQR))).sum()
127
  if count > 0: outliers[col] = count
128
  insights['outliers'] = outliers
129
+
130
  suggestions = []
131
  for col in metadata['categorical']:
132
  if df[col].nunique() == 2: suggestions.append(f"{col} (Classification)")
 
135
  insights['ml_suggestions'] = suggestions
136
  return insights
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  def prometheus_run_model(state, target, features, model_name):
139
+ """Trains and evaluates a model in the Prometheus Launchpad."""
140
  if not target or not features: return None, None, "Select target and features."
141
  df = state['df_modified'].copy()
142
  df.dropna(subset=[target] + features, inplace=True)
143
 
 
144
  for col in [target] + features:
145
  if df[col].dtype.name in ['category', 'object']:
146
+ df[col] = LabelEncoder().fit_transform(df[col])
 
 
147
 
148
  X, y = df[features], df[target]
149
  problem_type = "Classification" if y.nunique() <= 10 else "Regression"
150
 
151
+ MODELS = {"Classification": {"Random Forest": RandomForestClassifier, "Logistic Regression": LogisticRegression},
152
+ "Regression": {"Random Forest": RandomForestRegressor, "Linear Regression": LinearRegression}}
 
 
153
  if model_name not in MODELS[problem_type]: return None, None, "Invalid model for this problem type."
154
 
155
  model = MODELS[problem_type][model_name](random_state=42)
 
157
  if problem_type == "Classification":
158
  scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
159
  report = f"**Cross-Validated Accuracy:** {np.mean(scores):.3f} ยฑ {np.std(scores):.3f}"
 
160
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
161
  model.fit(X_train, y_train)
 
 
162
  y_prob = model.predict_proba(X_test)[:, 1]
163
  fpr, tpr, _ = roc_curve(y_test, y_prob)
164
+ fig1 = go.Figure(data=go.Scatter(x=fpr, y=tpr, mode='lines', name=f'ROC (AUC = {auc(fpr, tpr):.2f})'))
165
+ fig1.add_scatter(x=[0, 1], y=[0, 1], mode='lines', line=dict(dash='dash'), name='Random')
 
166
  fig1.update_layout(title="ROC Curve")
 
 
 
 
 
 
 
 
167
  else: # Regression
168
  scores = cross_val_score(model, X, y, cv=5, scoring='r2')
169
  report = f"**Cross-Validated Rยฒ Score:** {np.mean(scores):.3f} ยฑ {np.std(scores):.3f}"
 
170
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
171
  model.fit(X_train, y_train)
172
  preds = model.predict(X_test)
 
 
173
  residuals = y_test - preds
174
+ fig1 = px.scatter(x=preds, y=residuals, title="Residuals vs. Predicted", labels={'x': 'Predicted', 'y': 'Residuals'})
175
  fig1.add_hline(y=0, line_dash="dash")
176
 
177
+ if hasattr(model, 'feature_importances_'):
178
+ fi = pd.Series(model.feature_importances_, index=features).sort_values(ascending=False)
179
+ fig2 = px.bar(fi, title="Feature Importance")
180
+ else:
181
+ fig2 = go.Figure().update_layout(title="Feature Importance (Not available)")
182
+
183
+ return fig1, fig2, report
184
 
 
185
  def athena_respond(user_message, history, state, api_key):
186
+ """Handles the chat interaction with the AI Co-pilot."""
187
+ if not genai:
188
+ history.append((user_message, "Google AI library not installed. Cannot use Athena."))
189
+ return history, None, None, state
190
+ if not api_key:
191
+ history.append((user_message, "Please enter your Gemini API key to use Athena."))
192
+ return history, None, None, state
193
+
194
+ history.append((user_message, None))
195
+
196
+ # Configure the API
197
+ genai.configure(api_key=api_key)
198
+ model = genai.GenerativeModel('gemini-1.5-flash')
199
 
200
+ prompt = f"""
201
+ You are 'Athena', an AI data scientist. Your goal is to help a user by writing and executing Python code on a pandas DataFrame named `df`.
202
+
203
+ **DataFrame Info:**
204
+ {state['df_modified'].info(verbose=False)}
205
+
206
+ **Instructions:**
207
+ 1. Analyze the user's request: '{user_message}'.
208
+ 2. Formulate a plan (thought).
209
+ 3. Write Python code to execute the plan. You can use `pandas as pd`, `numpy as np`, and `plotly.express as px`.
210
+ 4. To show a plot, assign it to a variable `fig`.
211
+ 5. To show a dataframe, assign it to a variable `df_result`.
212
+ 6. Use `print()` for text output.
213
+ 7. **NEVER** modify `df` in place.
214
+ 8. Respond **ONLY** with a single, valid JSON object with keys "thought" and "code".
215
+
216
+ **Your JSON Response:**
217
+ """
218
+ try:
219
+ response = model.generate_content(prompt)
220
+ response_json = json.loads(response.text.strip().replace("```json", "").replace("```", ""))
221
+ thought = response_json.get("thought", "Thinking...")
222
+ code_to_run = response_json.get("code", "print('No code generated.')")
223
+
224
+ bot_thinking = f"๐Ÿง  **Thinking:** *{thought}*"
225
+ history[-1] = (user_message, bot_thinking)
226
+ yield history, None, None, state
227
+
228
+ local_vars = {'df': state['df_modified'], 'px': px, 'pd': pd, 'np': np}
229
+ stdout, fig_result, df_result, error = safe_exec(code_to_run, local_vars)
230
+
231
+ bot_response = bot_thinking + "\n\n---\n\n"
232
+ if error: bot_response += f"๐Ÿ’ฅ **Error:**\n```\n{error}\n```"
233
+ if stdout: bot_response += f"๐Ÿ“‹ **Output:**\n```\n{stdout}\n```"
234
+ if not error and not stdout and not fig_result and not isinstance(df_result, pd.DataFrame):
235
+ bot_response += "โœ… Code executed, but produced no direct output."
236
+
237
+ history[-1] = (user_message, bot_response)
238
+ state['chat_history'] = history # Persist chat history
239
+ yield history, fig_result, df_result, state
240
+
241
+ except Exception as e:
242
+ error_msg = f"A critical error occurred with the AI model: {e}"
243
+ history[-1] = (user_message, error_msg)
244
+ yield history, None, None, state
245
 
246
+ # --- UI Builder ---
247
  def build_ui():
248
+ """Constructs the entire Gradio application interface."""
249
  with gr.Blocks(theme=THEME, title="Odyssey AI Data Workspace") as demo:
250
  state = gr.State(init_state())
251
 
 
253
  # Left Sidebar - Command Center
254
  with gr.Column(scale=1):
255
  gr.Markdown("# ๐Ÿฆ‰ Odyssey")
 
256
  with gr.Accordion("๐Ÿ“‚ Project", open=True):
257
  project_name_input = gr.Textbox(label="Project Name", value="New_Project")
258
  file_input = gr.File(label="Upload CSV", file_types=[".csv"])
259
+ api_key_input = gr.Textbox(label="๐Ÿ”‘ Gemini API Key", type="password", placeholder="Enter key...")
260
  with gr.Row():
261
  save_btn = gr.Button("Save")
262
  load_btn = gr.UploadButton("Load .odyssey")
 
264
 
265
  # Navigation buttons
266
  overview_btn = gr.Button(f"{ICONS['overview']} Helios Overview")
 
267
  launchpad_btn = gr.Button(f"{ICONS['launchpad']} Prometheus Launchpad")
268
  copilot_btn = gr.Button(f"{ICONS['copilot']} Athena Co-pilot")
269
+ export_btn = gr.Button(f"{ICONS['export']} Export Report", visible=False)
 
 
 
 
270
 
271
  # Right Panel - Main Workspace
272
  with gr.Column(scale=4):
273
  # --- Helios Overview Panel ---
274
  with gr.Column(visible=True) as overview_panel:
275
  gr.Markdown(f"# {ICONS['overview']} Helios Overview")
276
+ helios_report_md = gr.Markdown("Upload a CSV and provide a project name to begin your Odyssey.")
 
 
277
 
 
 
 
 
 
 
 
 
 
 
 
 
278
  # --- Prometheus Launchpad Panel ---
279
  with gr.Column(visible=False) as launchpad_panel:
280
  gr.Markdown(f"# {ICONS['launchpad']} Prometheus Launchpad")
 
 
281
  with gr.Row():
282
  lp_target = gr.Dropdown(label="๐ŸŽฏ Target")
283
+ # CORRECTED LINE: Use gr.Dropdown with multiselect=True
284
+ lp_features = gr.Dropdown(label="โœจ Features", multiselect=True)
285
  lp_model = gr.Dropdown(choices=["Random Forest", "Logistic Regression", "Linear Regression"], label="๐Ÿง  Model")
286
  lp_run_btn = gr.Button("๐Ÿš€ Launch Model Training (with CV)")
287
  lp_report_md = gr.Markdown()
 
292
  # --- Athena Co-pilot Panel ---
293
  with gr.Column(visible=False) as copilot_panel:
294
  gr.Markdown(f"# {ICONS['copilot']} Athena Co-pilot")
295
+ chatbot = gr.Chatbot(height=500, label="Chat History")
296
+ with gr.Accordion("AI Generated Results", open=True):
297
+ copilot_fig_output = gr.Plot()
298
+ copilot_df_output = gr.DataFrame(interactive=False)
299
+ chat_input = gr.Textbox(label="Your Request", placeholder="e.g., 'What's the correlation between all numeric columns?'")
 
300
  chat_submit = gr.Button("Send", variant="primary")
301
 
302
  # --- Event Handling ---
303
+ panels = [overview_panel, launchpad_panel, copilot_panel]
 
 
304
  def switch_panel(btn_idx):
305
  return [gr.update(visible=i == btn_idx) for i in range(len(panels))]
306
 
307
  overview_btn.click(lambda: switch_panel(0), None, panels)
308
+ launchpad_btn.click(lambda: switch_panel(1), None, panels)
309
+ copilot_btn.click(lambda: switch_panel(2), None, panels)
 
310
 
311
+ def on_upload_or_load(state_data):
312
+ """Unified function to update UI after data is loaded or a project is loaded."""
 
 
313
  helios_md = "No data loaded."
314
+ if state_data and state_data.get('insights'):
315
+ insights = state_data['insights']
316
+ md = f"## ๐Ÿ”ญ Proactive Insights for `{state_data.get('project_name')}`\n"
317
+ md += f"Dataset has **{state_data['metadata']['shape'][0]} rows** and **{state_data['metadata']['shape'][1]} columns**.\n\n"
318
+ if suggestions := insights.get('ml_suggestions'):
319
+ md += "### ๐Ÿ”ฎ Potential ML Targets\n" + "\n".join(f"- `{s}`" for s in suggestions) + "\n"
320
+ if not insights.get('missing_data', pd.Series()).empty:
321
+ md += "\n### ๐Ÿ’ง Missing Data\nFound missing values in these columns:\n" + insights['missing_data'].to_frame('Missing Count').to_markdown() + "\n"
322
+ helios_md = md
323
 
324
+ all_cols = state_data.get('metadata', {}).get('columns', [])
325
+ return {
326
+ state: state_data,
327
+ helios_report_md: helios_md,
328
+ lp_target: gr.update(choices=all_cols),
329
+ lp_features: gr.update(choices=all_cols),
330
+ chatbot: state_data.get('chat_history', [])
331
+ }
332
 
333
+ file_input.upload(prime_data, [file_input, project_name_input], state).then(
334
+ on_upload_or_load, state, [state, helios_report_md, lp_target, lp_features, chatbot]
335
+ )
336
+ load_btn.upload(load_project, load_btn, state).then(
337
+ on_upload_or_load, state, [state, helios_report_md, lp_target, lp_features, chatbot]
338
+ )
339
  save_btn.click(save_project, state, project_status)
340
 
 
 
 
 
 
 
341
  lp_run_btn.click(prometheus_run_model, [state, lp_target, lp_features, lp_model], [lp_fig1, lp_fig2, lp_report_md])
342
+
343
+ chat_submit.click(
344
+ athena_respond,
345
+ [chat_input, chatbot, state, api_key_input],
346
+ [chatbot, copilot_fig_output, copilot_df_output, state]
347
+ ).then(lambda: "", outputs=chat_input)
348
 
349
  return demo
350