mgbam commited on
Commit
b5fce9d
·
verified ·
1 Parent(s): 6f81b01

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +356 -286
app.py CHANGED
@@ -1,354 +1,424 @@
1
- # Odyssey - The AI Data Science Workspace
2
- # A state-of-the-art, AI-native analytic environment.
3
- # This script is a complete, self-contained Gradio application.
4
-
5
  import gradio as gr
6
  import pandas as pd
7
  import numpy as np
8
  import plotly.express as px
9
  import plotly.graph_objects as go
10
- import io, os, json, pickle, logging, warnings, uuid
 
 
 
 
 
11
  from contextlib import redirect_stdout
12
- from datetime import datetime
13
-
14
- # ML & Preprocessing Imports
15
- from sklearn.model_selection import cross_val_score, train_test_split
16
- from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
17
- from sklearn.linear_model import LogisticRegression, LinearRegression
18
- from sklearn.metrics import roc_curve, auc, confusion_matrix, r2_score, mean_squared_error
19
- from sklearn.preprocessing import LabelEncoder
20
- from sklearn.impute import KNNImputer
21
-
22
- # Optional: For AI features
23
- try:
24
- import google.generativeai as genai
25
- except ImportError:
26
- print("Warning: 'google-generativeai' not found. AI features will be disabled.")
27
- genai = None
28
 
29
  # --- Configuration ---
30
  warnings.filterwarnings('ignore')
31
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
32
-
33
- # --- UI Theme & Icons ---
34
- THEME = gr.themes.Monochrome(primary_hue="indigo", secondary_hue="blue", neutral_hue="slate").set(
35
- body_background_fill="radial-gradient(circle, rgba(10,20,50,1) 0%, rgba(0,0,10,1) 100%);",
36
- block_label_background_fill="rgba(255,255,255,0.05)",
37
- block_background_fill="rgba(255,255,255,0.05)",
38
- button_primary_background_fill="linear-gradient(90deg, #6A11CB 0%, #2575FC 100%)",
39
- button_secondary_background_fill="linear-gradient(90deg, #556270 0%, #4ECDC4 100%)",
40
- color_accent_soft="rgba(255,255,255,0.2)"
41
- )
42
- ICONS = {"overview": "🔭", "medic": "🧪", "launchpad": "🚀", "copilot": "💡", "export": "📄"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  # --- Helper Functions ---
45
- def safe_exec(code_string: str, local_vars: dict) -> tuple:
46
  """Safely execute a string of Python code and capture its output."""
47
  output_buffer = io.StringIO()
48
  try:
49
  with redirect_stdout(output_buffer):
50
  exec(code_string, globals(), local_vars)
 
51
  stdout = output_buffer.getvalue()
52
  fig = local_vars.get('fig')
53
- df_out = local_vars.get('df_result')
54
- return stdout, fig, df_out, None
55
  except Exception as e:
56
  return None, None, None, f"Execution Error: {str(e)}"
57
 
58
- # --- Core State & Project Management ---
59
- def init_state():
60
- """Initializes a blank global state dictionary."""
61
- return {
62
- "project_name": None, "df_original": None, "df_modified": None,
63
- "metadata": None, "insights": None, "chat_history": []
64
- }
65
-
66
- def save_project(state):
67
- """Saves the entire application state to a .odyssey file."""
68
- if not state or not state.get("project_name"):
69
- return gr.update(value="Project needs a name to save.", interactive=True)
70
-
71
- filename = f"{state['project_name']}.odyssey"
72
- with open(filename, "wb") as f:
73
- pickle.dump(state, f)
74
- return gr.update(value=f"✅ Project saved to {filename}", interactive=True)
75
-
76
- def load_project(file_obj):
77
- """Loads a .odyssey file into the application state."""
78
- if not file_obj: return init_state()
79
- with open(file_obj.name, "rb") as f:
80
- return pickle.load(f)
81
-
82
- def prime_data(file_obj, project_name):
83
- """Main function to load a new CSV, analyze it, and set the initial state."""
84
- if not file_obj: return init_state()
85
- df = pd.read_csv(file_obj.name)
86
-
87
- for col in df.select_dtypes(include=['object']).columns:
88
- try:
89
- df[col] = pd.to_datetime(df[col], errors='raise')
90
- except (ValueError, TypeError):
91
- if 0.5 > df[col].nunique() / len(df) > 0.0:
92
- df[col] = df[col].astype('category')
93
-
94
- metadata = extract_metadata(df)
95
- insights = run_helios_engine(df, metadata)
96
-
97
- return {
98
- "project_name": project_name or f"Project_{datetime.now().strftime('%Y%m%d_%H%M')}",
99
- "df_original": df, "df_modified": df.copy(), "metadata": metadata,
100
- "insights": insights, "chat_history": []
101
- }
102
 
103
- def extract_metadata(df):
104
- """Utility to get schema and column types."""
 
 
 
 
105
  return {
106
- 'shape': df.shape, 'columns': df.columns.tolist(),
107
- 'numeric': df.select_dtypes(include=np.number).columns.tolist(),
108
- 'categorical': df.select_dtypes(include=['object', 'category']).columns.tolist(),
109
- 'datetime': df.select_dtypes(include='datetime').columns.tolist(),
110
- 'dtypes': df.dtypes.apply(lambda x: x.name).to_dict()
111
  }
112
 
113
- # --- Module-Specific Handlers ---
 
 
 
 
 
 
 
 
 
114
 
115
- def run_helios_engine(df, metadata):
116
- """The proactive analysis engine for the Helios Overview."""
117
- insights = {}
118
- missing = df.isnull().sum()
119
- insights['missing_data'] = missing[missing > 0].sort_values(ascending=False)
120
- insights['high_cardinality'] = {c: df[c].nunique() for c in metadata['categorical'] if df[c].nunique() > 50}
121
-
122
- outliers = {}
123
- for col in metadata['numeric']:
124
- Q1, Q3 = df[col].quantile(0.25), df[col].quantile(0.75)
125
- IQR = Q3 - Q1
126
- count = ((df[col] < (Q1 - 1.5 * IQR)) | (df[col] > (Q3 + 1.5 * IQR))).sum()
127
- if count > 0: outliers[col] = count
128
- insights['outliers'] = outliers
129
 
130
- suggestions = []
131
- for col in metadata['categorical']:
132
- if df[col].nunique() == 2: suggestions.append(f"{col} (Classification)")
133
- for col in metadata['numeric']:
134
- if df[col].nunique() > 20: suggestions.append(f"{col} (Regression)")
135
- insights['ml_suggestions'] = suggestions
136
- return insights
137
-
138
- def prometheus_run_model(state, target, features, model_name):
139
- """Trains and evaluates a model in the Prometheus Launchpad."""
140
- if not target or not features: return None, None, "Select target and features."
141
- df = state['df_modified'].copy()
142
- df.dropna(subset=[target] + features, inplace=True)
143
 
144
- for col in [target] + features:
145
- if df[col].dtype.name in ['category', 'object']:
146
- df[col] = LabelEncoder().fit_transform(df[col])
147
-
148
- X, y = df[features], df[target]
149
- problem_type = "Classification" if y.nunique() <= 10 else "Regression"
 
 
 
 
 
150
 
151
- MODELS = {"Classification": {"Random Forest": RandomForestClassifier, "Logistic Regression": LogisticRegression},
152
- "Regression": {"Random Forest": RandomForestRegressor, "Linear Regression": LinearRegression}}
153
- if model_name not in MODELS[problem_type]: return None, None, "Invalid model for this problem type."
 
 
 
 
154
 
155
- model = MODELS[problem_type][model_name](random_state=42)
 
 
156
 
157
- if problem_type == "Classification":
158
- scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
159
- report = f"**Cross-Validated Accuracy:** {np.mean(scores):.3f} ± {np.std(scores):.3f}"
160
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
161
- model.fit(X_train, y_train)
162
- y_prob = model.predict_proba(X_test)[:, 1]
163
- fpr, tpr, _ = roc_curve(y_test, y_prob)
164
- fig1 = go.Figure(data=go.Scatter(x=fpr, y=tpr, mode='lines', name=f'ROC (AUC = {auc(fpr, tpr):.2f})'))
165
- fig1.add_scatter(x=[0, 1], y=[0, 1], mode='lines', line=dict(dash='dash'), name='Random')
166
- fig1.update_layout(title="ROC Curve")
167
- else: # Regression
168
- scores = cross_val_score(model, X, y, cv=5, scoring='r2')
169
- report = f"**Cross-Validated R² Score:** {np.mean(scores):.3f} ± {np.std(scores):.3f}"
170
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
171
- model.fit(X_train, y_train)
172
- preds = model.predict(X_test)
173
- residuals = y_test - preds
174
- fig1 = px.scatter(x=preds, y=residuals, title="Residuals vs. Predicted", labels={'x': 'Predicted', 'y': 'Residuals'})
175
- fig1.add_hline(y=0, line_dash="dash")
176
 
177
- if hasattr(model, 'feature_importances_'):
178
- fi = pd.Series(model.feature_importances_, index=features).sort_values(ascending=False)
179
- fig2 = px.bar(fi, title="Feature Importance")
180
- else:
181
- fig2 = go.Figure().update_layout(title="Feature Importance (Not available)")
182
 
183
- return fig1, fig2, report
 
 
 
 
 
 
 
 
 
 
184
 
185
- def athena_respond(user_message, history, state, api_key):
186
- """Handles the chat interaction with the AI Co-pilot."""
187
- if not genai:
188
- history.append((user_message, "Google AI library not installed. Cannot use Athena."))
189
- return history, None, None, state
190
  if not api_key:
191
- history.append((user_message, "Please enter your Gemini API key to use Athena."))
192
- return history, None, None, state
 
 
 
193
 
194
  history.append((user_message, None))
195
-
196
- # Configure the API
197
- genai.configure(api_key=api_key)
198
- model = genai.GenerativeModel('gemini-1.5-flash')
199
 
 
200
  prompt = f"""
201
- You are 'Athena', an AI data scientist. Your goal is to help a user by writing and executing Python code on a pandas DataFrame named `df`.
202
-
203
- **DataFrame Info:**
204
- {state['df_modified'].info(verbose=False)}
205
 
206
  **Instructions:**
207
- 1. Analyze the user's request: '{user_message}'.
208
- 2. Formulate a plan (thought).
209
- 3. Write Python code to execute the plan. You can use `pandas as pd`, `numpy as np`, and `plotly.express as px`.
210
- 4. To show a plot, assign it to a variable `fig`.
211
- 5. To show a dataframe, assign it to a variable `df_result`.
212
- 6. Use `print()` for text output.
213
- 7. **NEVER** modify `df` in place.
214
- 8. Respond **ONLY** with a single, valid JSON object with keys "thought" and "code".
215
-
 
 
 
 
 
 
 
216
  **Your JSON Response:**
217
  """
 
218
  try:
 
 
219
  response = model.generate_content(prompt)
220
- response_json = json.loads(response.text.strip().replace("```json", "").replace("```", ""))
 
 
 
221
  thought = response_json.get("thought", "Thinking...")
222
- code_to_run = response_json.get("code", "print('No code generated.')")
 
223
 
224
- bot_thinking = f"🧠 **Thinking:** *{thought}*"
225
- history[-1] = (user_message, bot_thinking)
226
- yield history, None, None, state
 
227
 
228
- local_vars = {'df': state['df_modified'], 'px': px, 'pd': pd, 'np': np}
229
- stdout, fig_result, df_result, error = safe_exec(code_to_run, local_vars)
230
-
231
- bot_response = bot_thinking + "\n\n---\n\n"
232
- if error: bot_response += f"💥 **Error:**\n```\n{error}\n```"
233
- if stdout: bot_response += f"📋 **Output:**\n```\n{stdout}\n```"
234
- if not error and not stdout and not fig_result and not isinstance(df_result, pd.DataFrame):
235
- bot_response += "✅ Code executed, but produced no direct output."
236
-
237
- history[-1] = (user_message, bot_response)
238
- state['chat_history'] = history # Persist chat history
239
- yield history, fig_result, df_result, state
240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  except Exception as e:
242
- error_msg = f"A critical error occurred with the AI model: {e}"
243
  history[-1] = (user_message, error_msg)
244
- yield history, None, None, state
245
 
246
- # --- UI Builder ---
247
- def build_ui():
248
- """Constructs the entire Gradio application interface."""
249
- with gr.Blocks(theme=THEME, title="Odyssey AI Data Workspace") as demo:
250
- state = gr.State(init_state())
251
 
252
  with gr.Row():
253
- # Left Sidebar - Command Center
254
- with gr.Column(scale=1):
255
- gr.Markdown("# 🦉 Odyssey")
256
- with gr.Accordion("📂 Project", open=True):
257
- project_name_input = gr.Textbox(label="Project Name", value="New_Project")
258
- file_input = gr.File(label="Upload CSV", file_types=[".csv"])
259
- api_key_input = gr.Textbox(label="🔑 Gemini API Key", type="password", placeholder="Enter key...")
260
- with gr.Row():
261
- save_btn = gr.Button("Save")
262
- load_btn = gr.UploadButton("Load .odyssey")
263
- project_status = gr.Markdown()
264
 
265
  # Navigation buttons
266
- overview_btn = gr.Button(f"{ICONS['overview']} Helios Overview")
267
- launchpad_btn = gr.Button(f"{ICONS['launchpad']} Prometheus Launchpad")
268
- copilot_btn = gr.Button(f"{ICONS['copilot']} Athena Co-pilot")
269
- export_btn = gr.Button(f"{ICONS['export']} Export Report", visible=False)
 
 
 
 
 
 
270
 
271
- # Right Panel - Main Workspace
272
  with gr.Column(scale=4):
273
- # --- Helios Overview Panel ---
274
- with gr.Column(visible=True) as overview_panel:
275
- gr.Markdown(f"# {ICONS['overview']} Helios Overview")
276
- helios_report_md = gr.Markdown("Upload a CSV and provide a project name to begin your Odyssey.")
277
 
278
- # --- Prometheus Launchpad Panel ---
279
- with gr.Column(visible=False) as launchpad_panel:
280
- gr.Markdown(f"# {ICONS['launchpad']} Prometheus Launchpad")
 
 
 
 
 
 
281
  with gr.Row():
282
- lp_target = gr.Dropdown(label="🎯 Target")
283
- # CORRECTED LINE: Use gr.Dropdown with multiselect=True
284
- lp_features = gr.Dropdown(label="✨ Features", multiselect=True)
285
- lp_model = gr.Dropdown(choices=["Random Forest", "Logistic Regression", "Linear Regression"], label="🧠 Model")
286
- lp_run_btn = gr.Button("🚀 Launch Model Training (with CV)")
287
- lp_report_md = gr.Markdown()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  with gr.Row():
289
- lp_fig1 = gr.Plot()
290
- lp_fig2 = gr.Plot()
291
-
292
- # --- Athena Co-pilot Panel ---
293
- with gr.Column(visible=False) as copilot_panel:
294
- gr.Markdown(f"# {ICONS['copilot']} Athena Co-pilot")
295
- chatbot = gr.Chatbot(height=500, label="Chat History")
296
- with gr.Accordion("AI Generated Results", open=True):
297
- copilot_fig_output = gr.Plot()
298
- copilot_df_output = gr.DataFrame(interactive=False)
299
- chat_input = gr.Textbox(label="Your Request", placeholder="e.g., 'What's the correlation between all numeric columns?'")
300
- chat_submit = gr.Button("Send", variant="primary")
301
-
302
- # --- Event Handling ---
303
- panels = [overview_panel, launchpad_panel, copilot_panel]
304
- def switch_panel(btn_idx):
305
- return [gr.update(visible=i == btn_idx) for i in range(len(panels))]
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
- overview_btn.click(lambda: switch_panel(0), None, panels)
308
- launchpad_btn.click(lambda: switch_panel(1), None, panels)
309
- copilot_btn.click(lambda: switch_panel(2), None, panels)
310
-
311
- def on_upload_or_load(state_data):
312
- """Unified function to update UI after data is loaded or a project is loaded."""
313
- helios_md = "No data loaded."
314
- if state_data and state_data.get('insights'):
315
- insights = state_data['insights']
316
- md = f"## 🔭 Proactive Insights for `{state_data.get('project_name')}`\n"
317
- md += f"Dataset has **{state_data['metadata']['shape'][0]} rows** and **{state_data['metadata']['shape'][1]} columns**.\n\n"
318
- if suggestions := insights.get('ml_suggestions'):
319
- md += "### 🔮 Potential ML Targets\n" + "\n".join(f"- `{s}`" for s in suggestions) + "\n"
320
- if not insights.get('missing_data', pd.Series()).empty:
321
- md += "\n### 💧 Missing Data\nFound missing values in these columns:\n" + insights['missing_data'].to_frame('Missing Count').to_markdown() + "\n"
322
- helios_md = md
323
-
324
- all_cols = state_data.get('metadata', {}).get('columns', [])
325
- return {
326
- state: state_data,
327
- helios_report_md: helios_md,
328
- lp_target: gr.update(choices=all_cols),
329
- lp_features: gr.update(choices=all_cols),
330
- chatbot: state_data.get('chat_history', [])
331
- }
332
-
333
- file_input.upload(prime_data, [file_input, project_name_input], state).then(
334
- on_upload_or_load, state, [state, helios_report_md, lp_target, lp_features, chatbot]
335
- )
336
- load_btn.upload(load_project, load_btn, state).then(
337
- on_upload_or_load, state, [state, helios_report_md, lp_target, lp_features, chatbot]
338
  )
339
- save_btn.click(save_project, state, project_status)
340
 
341
- lp_run_btn.click(prometheus_run_model, [state, lp_target, lp_features, lp_model], [lp_fig1, lp_fig2, lp_report_md])
342
-
343
- chat_submit.click(
344
- athena_respond,
345
- [chat_input, chatbot, state, api_key_input],
346
- [chatbot, copilot_fig_output, copilot_df_output, state]
347
- ).then(lambda: "", outputs=chat_input)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
 
349
- return demo
350
 
351
- # --- Main Execution ---
352
  if __name__ == "__main__":
353
- app = build_ui()
354
  app.launch(debug=True)
 
 
 
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  import numpy as np
4
  import plotly.express as px
5
  import plotly.graph_objects as go
6
+ from plotly.subplots import make_subplots
7
+ import io
8
+ import json
9
+ import warnings
10
+ import google.generativeai as genai
11
+ import os
12
  from contextlib import redirect_stdout
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  # --- Configuration ---
15
  warnings.filterwarnings('ignore')
16
+ CSS = """
17
+ /* --- Phoenix UI Custom CSS --- */
18
+ /* Stat Card Styling */
19
+ .stat-card {
20
+ border-radius: 12px !important;
21
+ padding: 20px !important;
22
+ background: #f7fafc; /* light gray background */
23
+ border: 1px solid #e2e8f0;
24
+ box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
25
+ text-align: center;
26
+ }
27
+ .stat-card-title { font-size: 16px; font-weight: 500; color: #4a5568; margin-bottom: 8px; }
28
+ .stat-card-value { font-size: 32px; font-weight: 700; color: #2d3748; }
29
+
30
+ /* General Layout & Feel */
31
+ .gradio-container { font-family: 'Inter', sans-serif; }
32
+ .gr-button { box-shadow: 0 1px 2px 0 rgba(0,0,0,0.05); }
33
+
34
+ /* Sidebar Styling */
35
+ .sidebar {
36
+ background-color: #f9fafb;
37
+ padding: 15px;
38
+ border-right: 1px solid #e5e7eb;
39
+ min-height: 100vh;
40
+ }
41
+ .sidebar .gr-button {
42
+ width: 100%;
43
+ text-align: left !important;
44
+ background: none !important;
45
+ border: none !important;
46
+ box-shadow: none !important;
47
+ color: #374151 !important;
48
+ font-size: 16px !important;
49
+ padding: 12px 10px !important;
50
+ margin-bottom: 8px !important;
51
+ border-radius: 8px !important;
52
+ }
53
+ .sidebar .gr-button:hover { background-color: #e5e7eb !important; }
54
+ .sidebar .gr-button.selected { background-color: #d1d5db !important; font-weight: 600 !important; }
55
+
56
+ /* AI Co-pilot Styling */
57
+ .code-block { border: 1px solid #e5e7eb; border-radius: 8px; }
58
+ .explanation-block { background-color: #f0f9ff; border-left: 4px solid #3b82f6; padding: 12px; }
59
+ """
60
 
61
  # --- Helper Functions ---
62
+ def safe_exec(code_string: str, local_vars: dict):
63
  """Safely execute a string of Python code and capture its output."""
64
  output_buffer = io.StringIO()
65
  try:
66
  with redirect_stdout(output_buffer):
67
  exec(code_string, globals(), local_vars)
68
+
69
  stdout = output_buffer.getvalue()
70
  fig = local_vars.get('fig')
71
+ result_df = local_vars.get('result_df')
72
+ return stdout, fig, result_df, None
73
  except Exception as e:
74
  return None, None, None, f"Execution Error: {str(e)}"
75
 
76
+ # --- Core Data Processing & State Management ---
77
+ def load_and_process_file(file_obj, state_dict):
78
+ """Loads a CSV, processes it, and updates the entire UI state."""
79
+ if file_obj is None:
80
+ return state_dict, "Please upload a file.", *[gr.update(visible=False)] * 3
81
+ try:
82
+ df = pd.read_csv(file_obj.name, low_memory=False)
83
+ for col in df.select_dtypes(include=['object']).columns:
84
+ try:
85
+ df[col] = pd.to_datetime(df[col], errors='raise')
86
+ except (ValueError, TypeError):
87
+ continue
88
+
89
+ metadata = extract_dataset_metadata(df)
90
+ state_dict = {
91
+ 'df': df,
92
+ 'metadata': metadata,
93
+ 'filename': os.path.basename(file_obj.name),
94
+ 'dashboard_plots': []
95
+ }
96
+
97
+ status_msg = f" **{state_dict['filename']}** loaded successfully."
98
+
99
+ # Update UI elements with new data context
100
+ cockpit_update = gr.update(visible=True)
101
+ deep_dive_update = gr.update(visible=False)
102
+ copilot_update = gr.update(visible=False)
103
+ welcome_update = gr.update(visible=False)
104
+
105
+ # Stat cards
106
+ rows, cols = metadata['shape']
107
+ quality = metadata['data_quality']
108
+
109
+ return (state_dict, status_msg, welcome_update, cockpit_update, deep_dive_update, copilot_update,
110
+ gr.update(value=f"{rows:,}"), gr.update(value=cols), gr.update(value=f"{quality}%"),
111
+ gr.update(value=f"{len(metadata['datetime_cols'])}"),
112
+ gr.update(choices=metadata['columns']), gr.update(choices=metadata['columns']), gr.update(choices=metadata['columns']))
113
+ except Exception as e:
114
+ return state_dict, f"❌ **Error:** {e}", *[gr.update()] * 10
 
 
 
 
 
115
 
116
+ def extract_dataset_metadata(df: pd.DataFrame):
117
+ rows, cols = df.shape
118
+ numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
119
+ categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
120
+ datetime_cols = df.select_dtypes(include=['datetime64', 'datetime64[ns]']).columns.tolist()
121
+ data_quality = round((df.notna().sum().sum() / (rows * cols)) * 100, 1) if rows * cols > 0 else 0
122
  return {
123
+ 'shape': (rows, cols), 'columns': df.columns.tolist(),
124
+ 'numeric_cols': numeric_cols, 'categorical_cols': categorical_cols,
125
+ 'datetime_cols': datetime_cols, 'dtypes': df.dtypes.to_string(),
126
+ 'head': df.head().to_string(), 'data_quality': data_quality
 
127
  }
128
 
129
+ # --- Page Navigation ---
130
+ def switch_page(page_name):
131
+ """Controls visibility of main content pages."""
132
+ if page_name == "cockpit":
133
+ return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
134
+ elif page_name == "deep_dive":
135
+ return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
136
+ elif page_name == "co-pilot":
137
+ return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
138
+ return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
139
 
140
+ # --- Page 1: Data Cockpit ---
141
+ def get_ai_suggestions(state_dict, api_key):
142
+ """Generates proactive analytical suggestions from the AI."""
143
+ if not api_key: return "Enter your Gemini API key to get suggestions.", gr.update(visible=False)
144
+ if not state_dict: return "Upload data first.", gr.update(visible=False)
 
 
 
 
 
 
 
 
 
145
 
146
+ metadata = state_dict['metadata']
147
+ prompt = f"""
148
+ Based on the following dataset metadata, generate 3 to 5 specific, actionable, and interesting analytical questions a user might want to ask. Frame them as questions.
149
+ - **Columns:** {', '.join(metadata['columns'])}
150
+ - **Numeric:** {', '.join(metadata['numeric_cols'])}
151
+ - **Categorical:** {', '.join(metadata['categorical_cols'])}
152
+ - **Datetime:** {', '.join(metadata['datetime_cols'])}
 
 
 
 
 
 
153
 
154
+ Return ONLY a JSON list of strings. Example: ["What is the trend of sales over time?", "Which category has the highest average price?"]
155
+ """
156
+ try:
157
+ genai.configure(api_key=api_key)
158
+ model = genai.GenerativeModel('gemini-1.5-flash')
159
+ response = model.generate_content(prompt)
160
+ suggestions = json.loads(response.text)
161
+
162
+ # Create a button for each suggestion
163
+ buttons = [gr.Button(s, variant="secondary") for s in suggestions]
164
+ return gr.update(visible=False), gr.Accordion(label="✨ AI Smart Suggestions", open=True, children=buttons)
165
 
166
+ except Exception as e:
167
+ return f"Could not generate suggestions: {e}", gr.update(visible=False)
168
+
169
+ # --- Page 2: Deep Dive Dashboard ---
170
+ def add_plot_to_dashboard(state_dict, x_col, y_col, plot_type):
171
+ """Generates a plot and adds it to the state-managed dashboard."""
172
+ if not x_col: return state_dict, gr.update()
173
 
174
+ df = state_dict['df']
175
+ title = f"{plot_type.capitalize()}: {y_col} by {x_col}" if y_col else f"Distribution of {x_col}"
176
+ fig = None
177
 
178
+ try:
179
+ if plot_type == 'histogram': fig = px.histogram(df, x=x_col, title=title)
180
+ elif plot_type == 'box': fig = px.box(df, x=x_col, y=y_col, title=title)
181
+ elif plot_type == 'scatter': fig = px.scatter(df, x=x_col, y=y_col, title=title, trendline="ols")
182
+ elif plot_type == 'bar':
183
+ counts = df[x_col].value_counts().nlargest(20)
184
+ fig = px.bar(counts, x=counts.index, y=counts.values, title=f"Top 20 Categories for {x_col}")
185
+ fig.update_xaxes(title=x_col)
 
 
 
 
 
 
 
 
 
 
 
186
 
187
+ if fig:
188
+ state_dict['dashboard_plots'].append(fig)
 
 
 
189
 
190
+ # Rebuild the accordion with all plots
191
+ accordion_children = [gr.Plot(fig, visible=True) for fig in state_dict['dashboard_plots']]
192
+ return state_dict, gr.Accordion(label="Your Dashboard Plots", children=accordion_children, open=True)
193
+ except Exception as e:
194
+ gr.Warning(f"Plotting Error: {e}")
195
+ return state_dict, gr.update()
196
+
197
+ def clear_dashboard(state_dict):
198
+ """Clears all plots from the dashboard."""
199
+ state_dict['dashboard_plots'] = []
200
+ return state_dict, gr.Accordion(label="Your Dashboard Plots", children=[])
201
 
202
+ # --- Page 3: AI Co-pilot ---
203
+ def respond_to_chat(user_message, history, state_dict, api_key):
204
+ """Handles the advanced chat interaction with the AI Co-pilot."""
 
 
205
  if not api_key:
206
+ history.append((user_message, "I need a Gemini API key to function. Please provide it in the sidebar."))
207
+ return history, *[gr.update(visible=False)] * 4
208
+ if not state_dict:
209
+ history.append((user_message, "Please upload a dataset first."))
210
+ return history, *[gr.update(visible=False)] * 4
211
 
212
  history.append((user_message, None))
 
 
 
 
213
 
214
+ metadata = state_dict['metadata']
215
  prompt = f"""
216
+ You are 'Phoenix Co-pilot', an expert AI data analyst. Your goal is to help a user analyze a pandas DataFrame named `df`.
 
 
 
217
 
218
  **Instructions:**
219
+ 1. Carefully understand the user's question.
220
+ 2. Formulate a plan (thought process).
221
+ 3. Write Python code to execute that plan.
222
+ 4. The code can use pandas (pd), numpy (np), and plotly.express (px).
223
+ 5. **For plots, assign the figure to a variable `fig` (e.g., `fig = px.histogram(...)`).**
224
+ 6. **For table-like results, assign the final DataFrame to a variable `result_df` (e.g., `result_df = df.describe()`).**
225
+ 7. Do not modify the original `df`. Use `df.copy()` if needed.
226
+ 8. Provide a brief, user-friendly explanation of the result.
227
+ 9. Respond **ONLY** with a single, raw JSON object with keys: "thought", "code", "explanation".
228
+
229
+ **DataFrame Metadata:**
230
+ - Columns and dtypes: {metadata['dtypes']}
231
+ - First 5 rows: {metadata['head']}
232
+
233
+ **User Question:** "{user_message}"
234
+
235
  **Your JSON Response:**
236
  """
237
+
238
  try:
239
+ genai.configure(api_key=api_key)
240
+ model = genai.GenerativeModel('gemini-1.5-flash')
241
  response = model.generate_content(prompt)
242
+
243
+ response_text = response.text.strip().replace("```json", "").replace("```", "")
244
+ response_json = json.loads(response_text)
245
+
246
  thought = response_json.get("thought", "Thinking...")
247
+ code_to_run = response_json.get("code", "")
248
+ explanation = response_json.get("explanation", "Here is the result.")
249
 
250
+ stdout, fig_result, df_result, error = safe_exec(code_to_run, {'df': state_dict['df'], 'px': px, 'pd': pd, 'np': np})
251
+
252
+ bot_message = f"🤔 **Thought:** *{thought}*"
253
+ history[-1] = (user_message, bot_message)
254
 
255
+ # Prepare outputs, making them visible only if they contain content
256
+ output_updates = [gr.update(visible=False, value=None)] * 4 # [explanation, code, plot, table]
 
 
 
 
 
 
 
 
 
 
257
 
258
+ if explanation: output_updates[0] = gr.update(visible=True, value=f"**Phoenix Co-pilot:** {explanation}")
259
+ if code_to_run: output_updates[1] = gr.update(visible=True, value=code_to_run)
260
+ if fig_result: output_updates[2] = gr.update(visible=True, value=fig_result)
261
+ if df_result is not None: output_updates[3] = gr.update(visible=True, value=df_result)
262
+ if stdout:
263
+ # Append stdout to explanation if it exists
264
+ new_explanation = (output_updates[0]['value'] if output_updates[0]['value'] else "") + f"\n\n**Console Output:**\n```\n{stdout}\n```"
265
+ output_updates[0] = gr.update(visible=True, value=new_explanation)
266
+ if error:
267
+ error_explanation = f"**Phoenix Co-pilot:** I encountered an error. Here's the details:\n\n`{error}`"
268
+ output_updates[0] = gr.update(visible=True, value=error_explanation)
269
+
270
+ return history, *output_updates
271
+
272
  except Exception as e:
273
+ error_msg = f"A critical error occurred: {e}. The AI may have returned an invalid response. Please try rephrasing your question."
274
  history[-1] = (user_message, error_msg)
275
+ return history, *[gr.update(visible=False)] * 4
276
 
277
+ # --- Gradio UI Definition ---
278
+ def create_gradio_interface():
279
+ with gr.Blocks(theme=gr.themes.Monochrome(primary_hue="indigo", secondary_hue="blue"), css=CSS, title="Phoenix AI Data Explorer") as demo:
280
+ global_state = gr.State({})
 
281
 
282
  with gr.Row():
283
+ # --- Sidebar ---
284
+ with gr.Column(scale=1, elem_classes="sidebar"):
285
+ gr.Markdown("# 🚀 Phoenix UI")
286
+ gr.Markdown("AI Data Explorer")
 
 
 
 
 
 
 
287
 
288
  # Navigation buttons
289
+ cockpit_btn = gr.Button("📊 Data Cockpit", elem_classes="selected")
290
+ deep_dive_btn = gr.Button("🔍 Deep Dive Builder")
291
+ copilot_btn = gr.Button("🤖 AI Co-pilot")
292
+
293
+ gr.Markdown("---")
294
+ file_input = gr.File(label="📁 Upload New CSV", file_types=[".csv"])
295
+ status_output = gr.Markdown("Status: Awaiting data...")
296
+ gr.Markdown("---")
297
+ api_key_input = gr.Textbox(label="🔑 Gemini API Key", type="password", placeholder="Enter key here...")
298
+ suggestion_btn = gr.Button("Get Smart Suggestions", variant="secondary")
299
 
300
+ # --- Main Content Area ---
301
  with gr.Column(scale=4):
 
 
 
 
302
 
303
+ # Welcome Page (Visible initially)
304
+ with gr.Column(visible=True) as welcome_page:
305
+ gr.Markdown("# Welcome to the AI Data Explorer (Phoenix UI)", elem_id="welcome-header")
306
+ gr.Markdown("Please **upload a CSV file** and **enter your Gemini API key** in the sidebar to begin.")
307
+ gr.Image(value="https://i.imgur.com/gY5wSjV.png", label="Workflow", show_label=False, show_download_button=False, container=False) # Placeholder image
308
+
309
+ # Page 1: Data Cockpit (Hidden initially)
310
+ with gr.Column(visible=False) as cockpit_page:
311
+ gr.Markdown("## 📊 Data Cockpit")
312
  with gr.Row():
313
+ with gr.Column(elem_classes="stat-card"):
314
+ gr.Markdown("<div class='stat-card-title'>Rows</div>", elem_classes="stat-card-content")
315
+ rows_stat = gr.Textbox("0", show_label=False, elem_classes="stat-card-value")
316
+ with gr.Column(elem_classes="stat-card"):
317
+ gr.Markdown("<div class='stat-card-title'>Columns</div>", elem_classes="stat-card-content")
318
+ cols_stat = gr.Textbox("0", show_label=False, elem_classes="stat-card-value")
319
+ with gr.Column(elem_classes="stat-card"):
320
+ gr.Markdown("<div class='stat-card-title'>Data Quality</div>", elem_classes="stat-card-content")
321
+ quality_stat = gr.Textbox("0%", show_label=False, elem_classes="stat-card-value")
322
+ with gr.Column(elem_classes="stat-card"):
323
+ gr.Markdown("<div class='stat-card-title'>Date/Time Cols</div>", elem_classes="stat-card-content")
324
+ time_cols_stat = gr.Textbox("0", show_label=False, elem_classes="stat-card-value")
325
+
326
+ suggestion_status = gr.Markdown(visible=True)
327
+ suggestion_accordion = gr.Accordion(label="✨ AI Smart Suggestions", open=False, visible=False)
328
+
329
+ # Page 2: Deep Dive Dashboard Builder (Hidden initially)
330
+ with gr.Column(visible=False) as deep_dive_page:
331
+ gr.Markdown("## 🔍 Deep Dive Dashboard Builder")
332
+ gr.Markdown("Create a custom dashboard by adding multiple plots to investigate your data.")
333
  with gr.Row():
334
+ plot_type_dd = gr.Dropdown(['histogram', 'bar', 'scatter', 'box'], label="Plot Type", value='histogram')
335
+ x_col_dd = gr.Dropdown([], label="X-Axis / Column")
336
+ y_col_dd = gr.Dropdown([], label="Y-Axis (for Scatter/Box)")
337
+ with gr.Row():
338
+ add_plot_btn = gr.Button("Add to Dashboard", variant="primary")
339
+ clear_plots_btn = gr.Button("Clear Dashboard")
340
+ dashboard_accordion = gr.Accordion(label="Your Dashboard Plots", open=True)
341
+
342
+ # Page 3: AI Co-pilot (Hidden initially)
343
+ with gr.Column(visible=False) as copilot_page:
344
+ gr.Markdown("## 🤖 AI Co-pilot")
345
+ gr.Markdown("Ask complex questions in natural language. The Co-pilot will write and execute code to find the answer.")
346
+ chatbot = gr.Chatbot(height=400, label="Conversation with Co-pilot")
347
+
348
+ # AI's multi-modal response area
349
+ with gr.Accordion("Co-pilot's Response Details", open=True):
350
+ copilot_explanation = gr.Markdown(visible=False, elem_classes="explanation-block")
351
+ copilot_code = gr.Code(language="python", visible=False, label="Executed Python Code", elem_classes="code-block")
352
+ copilot_plot = gr.Plot(visible=False, label="Generated Visualization")
353
+ copilot_table = gr.Dataframe(visible=False, label="Generated Table", wrap=True)
354
+
355
+ with gr.Row():
356
+ chat_input = gr.Textbox(label="Your Question", placeholder="e.g., 'What is the correlation between age and salary?'", scale=4)
357
+ chat_submit_btn = gr.Button("Submit", variant="primary")
358
+
359
+ # --- Event Handlers ---
360
+ # Page Navigation
361
+ pages = [cockpit_page, deep_dive_page, copilot_page]
362
+ nav_buttons = [cockpit_btn, deep_dive_btn, copilot_btn]
363
 
364
+ for i, btn in enumerate(nav_buttons):
365
+ btn.click(lambda i=i: (gr.update(visible=i==0), gr.update(visible=i==1), gr.update(visible=i==2)),
366
+ outputs=pages).then(
367
+ lambda: [gr.update(elem_classes="selected" if j==i else "") for j in range(len(nav_buttons))],
368
+ outputs=nav_buttons)
369
+
370
+ # File Upload
371
+ file_input.upload(
372
+ fn=load_and_process_file,
373
+ inputs=[file_input, global_state],
374
+ outputs=[global_state, status_output, welcome_page, cockpit_page, deep_dive_page, copilot_page,
375
+ rows_stat, cols_stat, quality_stat, time_cols_stat,
376
+ x_col_dd, y_col_dd, copilot_input_col_ref_for_dynamic_update] # This last one is a dummy to trigger updates
377
+ ).then(
378
+ fn=lambda: (gr.update(elem_classes="selected"), gr.update(elem_classes=""), gr.update(elem_classes="")),
379
+ outputs=nav_buttons
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
  )
 
381
 
382
+ # Suggestions Button
383
+ suggestion_btn.click(
384
+ get_ai_suggestions,
385
+ [global_state, api_key_input],
386
+ [suggestion_status, suggestion_accordion]
387
+ ).then(
388
+ fn=lambda: [gr.Button.update(visible=True) for _ in range(5)], # Assumes max 5 suggestions for demo
389
+ outputs=[b for b in suggestion_accordion.children] if isinstance(suggestion_accordion, gr.Accordion) and suggestion_accordion.children else []
390
+ )
391
+ # Handle suggestion button clicks to populate chat
392
+ if isinstance(suggestion_accordion, gr.Accordion):
393
+ for button in suggestion_accordion.children:
394
+ button.click(
395
+ fn=lambda q=button.value: (gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), q),
396
+ outputs=[cockpit_page, deep_dive_page, copilot_page, chat_input]
397
+ ).then(
398
+ fn=lambda: (gr.update(elem_classes=""), gr.update(elem_classes=""), gr.update(elem_classes="selected")),
399
+ outputs=nav_buttons
400
+ )
401
+
402
+
403
+ # Dashboard Builder
404
+ add_plot_btn.click(add_plot_to_dashboard, [global_state, x_col_dd, y_col_dd, plot_type_dd], [global_state, dashboard_accordion])
405
+ clear_plots_btn.click(clear_dashboard, [global_state], [global_state, dashboard_accordion])
406
+
407
+ # AI Co-pilot Chat
408
+ copilot_input_col_ref_for_dynamic_update = x_col_dd # Dummy placeholder for dynamic updates
409
+ chat_submit_btn.click(
410
+ respond_to_chat,
411
+ [chat_input, chatbot, global_state, api_key_input],
412
+ [chatbot, copilot_explanation, copilot_code, copilot_plot, copilot_table]
413
+ ).then(lambda: "", outputs=[chat_input])
414
+ chat_input.submit(
415
+ respond_to_chat,
416
+ [chat_input, chatbot, global_state, api_key_input],
417
+ [chatbot, copilot_explanation, copilot_code, copilot_plot, copilot_table]
418
+ ).then(lambda: "", outputs=[chat_input])
419
 
420
+ return demo
421
 
 
422
  if __name__ == "__main__":
423
+ app = create_gradio_interface()
424
  app.launch(debug=True)