Update app.py
Browse files
app.py
CHANGED
@@ -1,12 +1,13 @@
|
|
1 |
# Odyssey - The AI Data Science Workspace
|
2 |
-
# A
|
|
|
3 |
|
4 |
import gradio as gr
|
5 |
import pandas as pd
|
6 |
import numpy as np
|
7 |
import plotly.express as px
|
8 |
import plotly.graph_objects as go
|
9 |
-
import io, os, json,
|
10 |
from contextlib import redirect_stdout
|
11 |
from datetime import datetime
|
12 |
|
@@ -18,13 +19,20 @@ from sklearn.metrics import roc_curve, auc, confusion_matrix, r2_score, mean_squ
|
|
18 |
from sklearn.preprocessing import LabelEncoder
|
19 |
from sklearn.impute import KNNImputer
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
# --- Configuration ---
|
22 |
warnings.filterwarnings('ignore')
|
23 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
24 |
|
25 |
# --- UI Theme & Icons ---
|
26 |
THEME = gr.themes.Monochrome(primary_hue="indigo", secondary_hue="blue", neutral_hue="slate").set(
|
27 |
-
body_background_fill="radial-gradient(circle, rgba(
|
28 |
block_label_background_fill="rgba(255,255,255,0.05)",
|
29 |
block_background_fill="rgba(255,255,255,0.05)",
|
30 |
button_primary_background_fill="linear-gradient(90deg, #6A11CB 0%, #2575FC 100%)",
|
@@ -33,57 +41,49 @@ THEME = gr.themes.Monochrome(primary_hue="indigo", secondary_hue="blue", neutral
|
|
33 |
)
|
34 |
ICONS = {"overview": "๐ญ", "medic": "๐งช", "launchpad": "๐", "copilot": "๐ก", "export": "๐"}
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
# --- Core State & Project Management ---
|
37 |
def init_state():
|
38 |
-
"""Initializes a blank global state."""
|
39 |
return {
|
40 |
-
"project_name": None,
|
41 |
-
"
|
42 |
-
"df_modified": None,
|
43 |
-
"metadata": None,
|
44 |
-
"insights": None,
|
45 |
-
"chat_history": [],
|
46 |
-
"dynamic_dashboards": {}
|
47 |
}
|
48 |
|
49 |
def save_project(state):
|
50 |
-
"""Saves the entire application state to a .
|
51 |
if not state or not state.get("project_name"):
|
52 |
return gr.update(value="Project needs a name to save.", interactive=True)
|
53 |
|
54 |
filename = f"{state['project_name']}.odyssey"
|
55 |
-
# Convert dataframes to pickle strings for serialization
|
56 |
-
state_to_save = state.copy()
|
57 |
-
if state_to_save['df_original'] is not None:
|
58 |
-
state_to_save['df_original'] = state_to_save['df_original'].to_pickle()
|
59 |
-
if state_to_save['df_modified'] is not None:
|
60 |
-
state_to_save['df_modified'] = state_to_save['df_modified'].to_pickle()
|
61 |
-
|
62 |
with open(filename, "wb") as f:
|
63 |
-
pickle.dump(
|
64 |
-
|
65 |
-
return gr.update(value=f"Project saved to {filename}", interactive=True)
|
66 |
|
67 |
def load_project(file_obj):
|
68 |
"""Loads a .odyssey file into the application state."""
|
69 |
if not file_obj: return init_state()
|
70 |
with open(file_obj.name, "rb") as f:
|
71 |
-
|
72 |
-
|
73 |
-
# Unpickle dataframes
|
74 |
-
if loaded_state['df_original'] is not None:
|
75 |
-
loaded_state['df_original'] = pd.read_pickle(io.BytesIO(loaded_state['df_original']))
|
76 |
-
if loaded_state['df_modified'] is not None:
|
77 |
-
loaded_state['df_modified'] = pd.read_pickle(io.BytesIO(loaded_state['df_modified']))
|
78 |
-
|
79 |
-
return loaded_state
|
80 |
|
81 |
def prime_data(file_obj, project_name):
|
82 |
"""Main function to load a new CSV, analyze it, and set the initial state."""
|
83 |
if not file_obj: return init_state()
|
84 |
df = pd.read_csv(file_obj.name)
|
85 |
|
86 |
-
# Smart type conversion
|
87 |
for col in df.select_dtypes(include=['object']).columns:
|
88 |
try:
|
89 |
df[col] = pd.to_datetime(df[col], errors='raise')
|
@@ -96,35 +96,29 @@ def prime_data(file_obj, project_name):
|
|
96 |
|
97 |
return {
|
98 |
"project_name": project_name or f"Project_{datetime.now().strftime('%Y%m%d_%H%M')}",
|
99 |
-
"df_original": df,
|
100 |
-
"
|
101 |
-
"metadata": metadata,
|
102 |
-
"insights": insights,
|
103 |
-
"chat_history": [],
|
104 |
-
"dynamic_dashboards": {}
|
105 |
}
|
106 |
|
107 |
def extract_metadata(df):
|
108 |
"""Utility to get schema and column types."""
|
109 |
return {
|
110 |
-
'shape': df.shape,
|
111 |
-
'columns': df.columns.tolist(),
|
112 |
'numeric': df.select_dtypes(include=np.number).columns.tolist(),
|
113 |
'categorical': df.select_dtypes(include=['object', 'category']).columns.tolist(),
|
114 |
'datetime': df.select_dtypes(include='datetime').columns.tolist(),
|
115 |
'dtypes': df.dtypes.apply(lambda x: x.name).to_dict()
|
116 |
}
|
117 |
|
118 |
-
# ---
|
|
|
119 |
def run_helios_engine(df, metadata):
|
120 |
-
"""The proactive analysis engine."""
|
121 |
insights = {}
|
122 |
-
# Missing Data
|
123 |
missing = df.isnull().sum()
|
124 |
insights['missing_data'] = missing[missing > 0].sort_values(ascending=False)
|
125 |
-
# High Cardinality
|
126 |
insights['high_cardinality'] = {c: df[c].nunique() for c in metadata['categorical'] if df[c].nunique() > 50}
|
127 |
-
|
128 |
outliers = {}
|
129 |
for col in metadata['numeric']:
|
130 |
Q1, Q3 = df[col].quantile(0.25), df[col].quantile(0.75)
|
@@ -132,7 +126,7 @@ def run_helios_engine(df, metadata):
|
|
132 |
count = ((df[col] < (Q1 - 1.5 * IQR)) | (df[col] > (Q3 + 1.5 * IQR))).sum()
|
133 |
if count > 0: outliers[col] = count
|
134 |
insights['outliers'] = outliers
|
135 |
-
|
136 |
suggestions = []
|
137 |
for col in metadata['categorical']:
|
138 |
if df[col].nunique() == 2: suggestions.append(f"{col} (Classification)")
|
@@ -141,57 +135,21 @@ def run_helios_engine(df, metadata):
|
|
141 |
insights['ml_suggestions'] = suggestions
|
142 |
return insights
|
143 |
|
144 |
-
# --- Asclepius Data Lab Handlers ---
|
145 |
-
def medic_preview_imputation(state, col, num_method, cat_method):
|
146 |
-
if not col or col not in state['df_modified'].columns: return None
|
147 |
-
df_mod = state['df_modified'].copy()
|
148 |
-
|
149 |
-
if col in state['metadata']['numeric']:
|
150 |
-
if num_method == 'KNN':
|
151 |
-
imputer = KNNImputer(n_neighbors=5)
|
152 |
-
df_mod[col] = imputer.fit_transform(df_mod[[col]])
|
153 |
-
else:
|
154 |
-
value = df_mod[col].mean() if num_method == 'mean' else df_mod[col].median()
|
155 |
-
df_mod[col].fillna(value, inplace=True)
|
156 |
-
|
157 |
-
fig = go.Figure()
|
158 |
-
fig.add_trace(go.Histogram(x=state['df_original'][col], name='Original', opacity=0.7))
|
159 |
-
fig.add_trace(go.Histogram(x=df_mod[col], name='Imputed', opacity=0.7))
|
160 |
-
fig.update_layout(barmode='overlay', title_text=f"Distribution for '{col}'", legend_title_text='Dataset')
|
161 |
-
return fig
|
162 |
-
|
163 |
-
elif col in state['metadata']['categorical']:
|
164 |
-
if cat_method == "Create 'Missing' Category":
|
165 |
-
df_mod[col] = df_mod[col].cat.add_categories("Missing").fillna("Missing") if hasattr(df_mod[col], 'cat') else df_mod[col].fillna("Missing")
|
166 |
-
else: # Mode
|
167 |
-
df_mod[col].fillna(df_mod[col].mode()[0], inplace=True)
|
168 |
-
|
169 |
-
fig = go.Figure()
|
170 |
-
fig.add_trace(go.Bar(x=state['df_original'][col].value_counts().index, y=state['df_original'][col].value_counts().values, name='Original'))
|
171 |
-
fig.add_trace(go.Bar(x=df_mod[col].value_counts().index, y=df_mod[col].value_counts().values, name='Imputed'))
|
172 |
-
return fig
|
173 |
-
return None
|
174 |
-
|
175 |
-
# --- Prometheus Launchpad Handlers ---
|
176 |
def prometheus_run_model(state, target, features, model_name):
|
|
|
177 |
if not target or not features: return None, None, "Select target and features."
|
178 |
df = state['df_modified'].copy()
|
179 |
df.dropna(subset=[target] + features, inplace=True)
|
180 |
|
181 |
-
le_map = {}
|
182 |
for col in [target] + features:
|
183 |
if df[col].dtype.name in ['category', 'object']:
|
184 |
-
|
185 |
-
df[col] = le.fit_transform(df[col])
|
186 |
-
le_map[col] = le
|
187 |
|
188 |
X, y = df[features], df[target]
|
189 |
problem_type = "Classification" if y.nunique() <= 10 else "Regression"
|
190 |
|
191 |
-
MODELS = {
|
192 |
-
|
193 |
-
"Regression": {"Random Forest": RandomForestRegressor, "Linear Regression": LinearRegression}
|
194 |
-
}
|
195 |
if model_name not in MODELS[problem_type]: return None, None, "Invalid model for this problem type."
|
196 |
|
197 |
model = MODELS[problem_type][model_name](random_state=42)
|
@@ -199,63 +157,95 @@ def prometheus_run_model(state, target, features, model_name):
|
|
199 |
if problem_type == "Classification":
|
200 |
scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
|
201 |
report = f"**Cross-Validated Accuracy:** {np.mean(scores):.3f} ยฑ {np.std(scores):.3f}"
|
202 |
-
|
203 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
204 |
model.fit(X_train, y_train)
|
205 |
-
|
206 |
-
# ROC Curve
|
207 |
y_prob = model.predict_proba(X_test)[:, 1]
|
208 |
fpr, tpr, _ = roc_curve(y_test, y_prob)
|
209 |
-
|
210 |
-
fig1
|
211 |
-
fig1.add_scatter(x=[0, 1], y=[0, 1], mode='lines', line=dict(dash='dash'), name='Random Chance')
|
212 |
fig1.update_layout(title="ROC Curve")
|
213 |
-
|
214 |
-
# Feature Importance
|
215 |
-
if hasattr(model, 'feature_importances_'):
|
216 |
-
fi = pd.Series(model.feature_importances_, index=features).sort_values(ascending=False)
|
217 |
-
fig2 = px.bar(fi, title="Feature Importance")
|
218 |
-
else: fig2 = go.Figure().update_layout(title="Feature Importance (Not available for this model)")
|
219 |
-
|
220 |
-
return fig1, fig2, report
|
221 |
else: # Regression
|
222 |
scores = cross_val_score(model, X, y, cv=5, scoring='r2')
|
223 |
report = f"**Cross-Validated Rยฒ Score:** {np.mean(scores):.3f} ยฑ {np.std(scores):.3f}"
|
224 |
-
|
225 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
226 |
model.fit(X_train, y_train)
|
227 |
preds = model.predict(X_test)
|
228 |
-
|
229 |
-
# Residuals Plot
|
230 |
residuals = y_test - preds
|
231 |
-
fig1 = px.scatter(x=preds, y=residuals, title="Residuals vs. Predicted
|
232 |
fig1.add_hline(y=0, line_dash="dash")
|
233 |
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
|
242 |
-
# --- Athena Co-pilot Handlers ---
|
243 |
def athena_respond(user_message, history, state, api_key):
|
244 |
-
|
245 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
|
257 |
-
# --- UI Builder
|
258 |
def build_ui():
|
|
|
259 |
with gr.Blocks(theme=THEME, title="Odyssey AI Data Workspace") as demo:
|
260 |
state = gr.State(init_state())
|
261 |
|
@@ -263,10 +253,10 @@ def build_ui():
|
|
263 |
# Left Sidebar - Command Center
|
264 |
with gr.Column(scale=1):
|
265 |
gr.Markdown("# ๐ฆ Odyssey")
|
266 |
-
|
267 |
with gr.Accordion("๐ Project", open=True):
|
268 |
project_name_input = gr.Textbox(label="Project Name", value="New_Project")
|
269 |
file_input = gr.File(label="Upload CSV", file_types=[".csv"])
|
|
|
270 |
with gr.Row():
|
271 |
save_btn = gr.Button("Save")
|
272 |
load_btn = gr.UploadButton("Load .odyssey")
|
@@ -274,44 +264,24 @@ def build_ui():
|
|
274 |
|
275 |
# Navigation buttons
|
276 |
overview_btn = gr.Button(f"{ICONS['overview']} Helios Overview")
|
277 |
-
medic_btn = gr.Button(f"{ICONS['medic']} Asclepius Data Lab")
|
278 |
launchpad_btn = gr.Button(f"{ICONS['launchpad']} Prometheus Launchpad")
|
279 |
copilot_btn = gr.Button(f"{ICONS['copilot']} Athena Co-pilot")
|
280 |
-
export_btn = gr.Button(f"{ICONS['export']} Export Report")
|
281 |
-
|
282 |
-
# Global Info
|
283 |
-
with gr.Accordion("Global Info", open=False):
|
284 |
-
file_info_md = gr.Markdown("No file loaded.")
|
285 |
|
286 |
# Right Panel - Main Workspace
|
287 |
with gr.Column(scale=4):
|
288 |
# --- Helios Overview Panel ---
|
289 |
with gr.Column(visible=True) as overview_panel:
|
290 |
gr.Markdown(f"# {ICONS['overview']} Helios Overview")
|
291 |
-
gr.Markdown("
|
292 |
-
# Interactive dashboard components would go here
|
293 |
-
helios_report_md = gr.Markdown("Upload data to begin analysis.")
|
294 |
|
295 |
-
# --- Asclepius Data Lab Panel ---
|
296 |
-
with gr.Column(visible=False) as medic_panel:
|
297 |
-
gr.Markdown(f"# {ICONS['medic']} Asclepius Data Lab")
|
298 |
-
gr.Markdown("Interactively clean and prepare your data.")
|
299 |
-
# UI components for Data Medic
|
300 |
-
medic_col_select = gr.Dropdown(label="Select Column to Clean")
|
301 |
-
with gr.Row():
|
302 |
-
medic_num_method = gr.Radio(['mean', 'median', 'KNN'], label="Numeric Imputation", value='mean')
|
303 |
-
medic_cat_method = gr.Radio(['mode', "Create 'Missing' Category"], label="Categorical Imputation", value='mode')
|
304 |
-
medic_preview_plot = gr.Plot()
|
305 |
-
medic_apply_btn = gr.Button("Apply Changes to Session")
|
306 |
-
|
307 |
# --- Prometheus Launchpad Panel ---
|
308 |
with gr.Column(visible=False) as launchpad_panel:
|
309 |
gr.Markdown(f"# {ICONS['launchpad']} Prometheus Launchpad")
|
310 |
-
gr.Markdown("Train, evaluate, and understand predictive models.")
|
311 |
-
# UI components for Launchpad
|
312 |
with gr.Row():
|
313 |
lp_target = gr.Dropdown(label="๐ฏ Target")
|
314 |
-
|
|
|
315 |
lp_model = gr.Dropdown(choices=["Random Forest", "Logistic Regression", "Linear Regression"], label="๐ง Model")
|
316 |
lp_run_btn = gr.Button("๐ Launch Model Training (with CV)")
|
317 |
lp_report_md = gr.Markdown()
|
@@ -322,54 +292,59 @@ def build_ui():
|
|
322 |
# --- Athena Co-pilot Panel ---
|
323 |
with gr.Column(visible=False) as copilot_panel:
|
324 |
gr.Markdown(f"# {ICONS['copilot']} Athena Co-pilot")
|
325 |
-
gr.
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
chat_input = gr.Textbox(label="Your Request")
|
331 |
chat_submit = gr.Button("Send", variant="primary")
|
332 |
|
333 |
# --- Event Handling ---
|
334 |
-
|
335 |
-
# Panel Navigation
|
336 |
-
panels = [overview_panel, medic_panel, launchpad_panel, copilot_panel]
|
337 |
def switch_panel(btn_idx):
|
338 |
return [gr.update(visible=i == btn_idx) for i in range(len(panels))]
|
339 |
|
340 |
overview_btn.click(lambda: switch_panel(0), None, panels)
|
341 |
-
|
342 |
-
|
343 |
-
copilot_btn.click(lambda: switch_panel(3), None, panels)
|
344 |
|
345 |
-
|
346 |
-
|
347 |
-
new_state = prime_data(file, name)
|
348 |
-
# Update all UI components based on the new state
|
349 |
helios_md = "No data loaded."
|
350 |
-
if
|
351 |
-
|
352 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
353 |
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
|
|
|
|
360 |
|
361 |
-
file_input.upload(
|
362 |
-
|
363 |
-
|
|
|
|
|
|
|
364 |
save_btn.click(save_project, state, project_status)
|
365 |
|
366 |
-
# Asclepius Live Preview
|
367 |
-
medic_col_select.change(medic_preview_imputation, [state, medic_col_select, medic_num_method, medic_cat_method], medic_preview_plot)
|
368 |
-
medic_num_method.change(medic_preview_imputation, [state, medic_col_select, medic_num_method, medic_cat_method], medic_preview_plot)
|
369 |
-
medic_cat_method.change(medic_preview_imputation, [state, medic_col_select, medic_num_method, medic_cat_method], medic_preview_plot)
|
370 |
-
|
371 |
-
# Prometheus Model Training
|
372 |
lp_run_btn.click(prometheus_run_model, [state, lp_target, lp_features, lp_model], [lp_fig1, lp_fig2, lp_report_md])
|
|
|
|
|
|
|
|
|
|
|
|
|
373 |
|
374 |
return demo
|
375 |
|
|
|
1 |
# Odyssey - The AI Data Science Workspace
|
2 |
+
# A state-of-the-art, AI-native analytic environment.
|
3 |
+
# This script is a complete, self-contained Gradio application.
|
4 |
|
5 |
import gradio as gr
|
6 |
import pandas as pd
|
7 |
import numpy as np
|
8 |
import plotly.express as px
|
9 |
import plotly.graph_objects as go
|
10 |
+
import io, os, json, pickle, logging, warnings, uuid
|
11 |
from contextlib import redirect_stdout
|
12 |
from datetime import datetime
|
13 |
|
|
|
19 |
from sklearn.preprocessing import LabelEncoder
|
20 |
from sklearn.impute import KNNImputer
|
21 |
|
22 |
+
# Optional: For AI features
|
23 |
+
try:
|
24 |
+
import google.generativeai as genai
|
25 |
+
except ImportError:
|
26 |
+
print("Warning: 'google-generativeai' not found. AI features will be disabled.")
|
27 |
+
genai = None
|
28 |
+
|
29 |
# --- Configuration ---
|
30 |
warnings.filterwarnings('ignore')
|
31 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
32 |
|
33 |
# --- UI Theme & Icons ---
|
34 |
THEME = gr.themes.Monochrome(primary_hue="indigo", secondary_hue="blue", neutral_hue="slate").set(
|
35 |
+
body_background_fill="radial-gradient(circle, rgba(10,20,50,1) 0%, rgba(0,0,10,1) 100%);",
|
36 |
block_label_background_fill="rgba(255,255,255,0.05)",
|
37 |
block_background_fill="rgba(255,255,255,0.05)",
|
38 |
button_primary_background_fill="linear-gradient(90deg, #6A11CB 0%, #2575FC 100%)",
|
|
|
41 |
)
|
42 |
ICONS = {"overview": "๐ญ", "medic": "๐งช", "launchpad": "๐", "copilot": "๐ก", "export": "๐"}
|
43 |
|
44 |
+
# --- Helper Functions ---
|
45 |
+
def safe_exec(code_string: str, local_vars: dict) -> tuple:
|
46 |
+
"""Safely execute a string of Python code and capture its output."""
|
47 |
+
output_buffer = io.StringIO()
|
48 |
+
try:
|
49 |
+
with redirect_stdout(output_buffer):
|
50 |
+
exec(code_string, globals(), local_vars)
|
51 |
+
stdout = output_buffer.getvalue()
|
52 |
+
fig = local_vars.get('fig')
|
53 |
+
df_out = local_vars.get('df_result')
|
54 |
+
return stdout, fig, df_out, None
|
55 |
+
except Exception as e:
|
56 |
+
return None, None, None, f"Execution Error: {str(e)}"
|
57 |
+
|
58 |
# --- Core State & Project Management ---
|
59 |
def init_state():
|
60 |
+
"""Initializes a blank global state dictionary."""
|
61 |
return {
|
62 |
+
"project_name": None, "df_original": None, "df_modified": None,
|
63 |
+
"metadata": None, "insights": None, "chat_history": []
|
|
|
|
|
|
|
|
|
|
|
64 |
}
|
65 |
|
66 |
def save_project(state):
|
67 |
+
"""Saves the entire application state to a .odyssey file."""
|
68 |
if not state or not state.get("project_name"):
|
69 |
return gr.update(value="Project needs a name to save.", interactive=True)
|
70 |
|
71 |
filename = f"{state['project_name']}.odyssey"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
with open(filename, "wb") as f:
|
73 |
+
pickle.dump(state, f)
|
74 |
+
return gr.update(value=f"โ
Project saved to {filename}", interactive=True)
|
|
|
75 |
|
76 |
def load_project(file_obj):
|
77 |
"""Loads a .odyssey file into the application state."""
|
78 |
if not file_obj: return init_state()
|
79 |
with open(file_obj.name, "rb") as f:
|
80 |
+
return pickle.load(f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
def prime_data(file_obj, project_name):
|
83 |
"""Main function to load a new CSV, analyze it, and set the initial state."""
|
84 |
if not file_obj: return init_state()
|
85 |
df = pd.read_csv(file_obj.name)
|
86 |
|
|
|
87 |
for col in df.select_dtypes(include=['object']).columns:
|
88 |
try:
|
89 |
df[col] = pd.to_datetime(df[col], errors='raise')
|
|
|
96 |
|
97 |
return {
|
98 |
"project_name": project_name or f"Project_{datetime.now().strftime('%Y%m%d_%H%M')}",
|
99 |
+
"df_original": df, "df_modified": df.copy(), "metadata": metadata,
|
100 |
+
"insights": insights, "chat_history": []
|
|
|
|
|
|
|
|
|
101 |
}
|
102 |
|
103 |
def extract_metadata(df):
|
104 |
"""Utility to get schema and column types."""
|
105 |
return {
|
106 |
+
'shape': df.shape, 'columns': df.columns.tolist(),
|
|
|
107 |
'numeric': df.select_dtypes(include=np.number).columns.tolist(),
|
108 |
'categorical': df.select_dtypes(include=['object', 'category']).columns.tolist(),
|
109 |
'datetime': df.select_dtypes(include='datetime').columns.tolist(),
|
110 |
'dtypes': df.dtypes.apply(lambda x: x.name).to_dict()
|
111 |
}
|
112 |
|
113 |
+
# --- Module-Specific Handlers ---
|
114 |
+
|
115 |
def run_helios_engine(df, metadata):
|
116 |
+
"""The proactive analysis engine for the Helios Overview."""
|
117 |
insights = {}
|
|
|
118 |
missing = df.isnull().sum()
|
119 |
insights['missing_data'] = missing[missing > 0].sort_values(ascending=False)
|
|
|
120 |
insights['high_cardinality'] = {c: df[c].nunique() for c in metadata['categorical'] if df[c].nunique() > 50}
|
121 |
+
|
122 |
outliers = {}
|
123 |
for col in metadata['numeric']:
|
124 |
Q1, Q3 = df[col].quantile(0.25), df[col].quantile(0.75)
|
|
|
126 |
count = ((df[col] < (Q1 - 1.5 * IQR)) | (df[col] > (Q3 + 1.5 * IQR))).sum()
|
127 |
if count > 0: outliers[col] = count
|
128 |
insights['outliers'] = outliers
|
129 |
+
|
130 |
suggestions = []
|
131 |
for col in metadata['categorical']:
|
132 |
if df[col].nunique() == 2: suggestions.append(f"{col} (Classification)")
|
|
|
135 |
insights['ml_suggestions'] = suggestions
|
136 |
return insights
|
137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
def prometheus_run_model(state, target, features, model_name):
|
139 |
+
"""Trains and evaluates a model in the Prometheus Launchpad."""
|
140 |
if not target or not features: return None, None, "Select target and features."
|
141 |
df = state['df_modified'].copy()
|
142 |
df.dropna(subset=[target] + features, inplace=True)
|
143 |
|
|
|
144 |
for col in [target] + features:
|
145 |
if df[col].dtype.name in ['category', 'object']:
|
146 |
+
df[col] = LabelEncoder().fit_transform(df[col])
|
|
|
|
|
147 |
|
148 |
X, y = df[features], df[target]
|
149 |
problem_type = "Classification" if y.nunique() <= 10 else "Regression"
|
150 |
|
151 |
+
MODELS = {"Classification": {"Random Forest": RandomForestClassifier, "Logistic Regression": LogisticRegression},
|
152 |
+
"Regression": {"Random Forest": RandomForestRegressor, "Linear Regression": LinearRegression}}
|
|
|
|
|
153 |
if model_name not in MODELS[problem_type]: return None, None, "Invalid model for this problem type."
|
154 |
|
155 |
model = MODELS[problem_type][model_name](random_state=42)
|
|
|
157 |
if problem_type == "Classification":
|
158 |
scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
|
159 |
report = f"**Cross-Validated Accuracy:** {np.mean(scores):.3f} ยฑ {np.std(scores):.3f}"
|
|
|
160 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
161 |
model.fit(X_train, y_train)
|
|
|
|
|
162 |
y_prob = model.predict_proba(X_test)[:, 1]
|
163 |
fpr, tpr, _ = roc_curve(y_test, y_prob)
|
164 |
+
fig1 = go.Figure(data=go.Scatter(x=fpr, y=tpr, mode='lines', name=f'ROC (AUC = {auc(fpr, tpr):.2f})'))
|
165 |
+
fig1.add_scatter(x=[0, 1], y=[0, 1], mode='lines', line=dict(dash='dash'), name='Random')
|
|
|
166 |
fig1.update_layout(title="ROC Curve")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
else: # Regression
|
168 |
scores = cross_val_score(model, X, y, cv=5, scoring='r2')
|
169 |
report = f"**Cross-Validated Rยฒ Score:** {np.mean(scores):.3f} ยฑ {np.std(scores):.3f}"
|
|
|
170 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
171 |
model.fit(X_train, y_train)
|
172 |
preds = model.predict(X_test)
|
|
|
|
|
173 |
residuals = y_test - preds
|
174 |
+
fig1 = px.scatter(x=preds, y=residuals, title="Residuals vs. Predicted", labels={'x': 'Predicted', 'y': 'Residuals'})
|
175 |
fig1.add_hline(y=0, line_dash="dash")
|
176 |
|
177 |
+
if hasattr(model, 'feature_importances_'):
|
178 |
+
fi = pd.Series(model.feature_importances_, index=features).sort_values(ascending=False)
|
179 |
+
fig2 = px.bar(fi, title="Feature Importance")
|
180 |
+
else:
|
181 |
+
fig2 = go.Figure().update_layout(title="Feature Importance (Not available)")
|
182 |
+
|
183 |
+
return fig1, fig2, report
|
184 |
|
|
|
185 |
def athena_respond(user_message, history, state, api_key):
|
186 |
+
"""Handles the chat interaction with the AI Co-pilot."""
|
187 |
+
if not genai:
|
188 |
+
history.append((user_message, "Google AI library not installed. Cannot use Athena."))
|
189 |
+
return history, None, None, state
|
190 |
+
if not api_key:
|
191 |
+
history.append((user_message, "Please enter your Gemini API key to use Athena."))
|
192 |
+
return history, None, None, state
|
193 |
+
|
194 |
+
history.append((user_message, None))
|
195 |
+
|
196 |
+
# Configure the API
|
197 |
+
genai.configure(api_key=api_key)
|
198 |
+
model = genai.GenerativeModel('gemini-1.5-flash')
|
199 |
|
200 |
+
prompt = f"""
|
201 |
+
You are 'Athena', an AI data scientist. Your goal is to help a user by writing and executing Python code on a pandas DataFrame named `df`.
|
202 |
+
|
203 |
+
**DataFrame Info:**
|
204 |
+
{state['df_modified'].info(verbose=False)}
|
205 |
+
|
206 |
+
**Instructions:**
|
207 |
+
1. Analyze the user's request: '{user_message}'.
|
208 |
+
2. Formulate a plan (thought).
|
209 |
+
3. Write Python code to execute the plan. You can use `pandas as pd`, `numpy as np`, and `plotly.express as px`.
|
210 |
+
4. To show a plot, assign it to a variable `fig`.
|
211 |
+
5. To show a dataframe, assign it to a variable `df_result`.
|
212 |
+
6. Use `print()` for text output.
|
213 |
+
7. **NEVER** modify `df` in place.
|
214 |
+
8. Respond **ONLY** with a single, valid JSON object with keys "thought" and "code".
|
215 |
+
|
216 |
+
**Your JSON Response:**
|
217 |
+
"""
|
218 |
+
try:
|
219 |
+
response = model.generate_content(prompt)
|
220 |
+
response_json = json.loads(response.text.strip().replace("```json", "").replace("```", ""))
|
221 |
+
thought = response_json.get("thought", "Thinking...")
|
222 |
+
code_to_run = response_json.get("code", "print('No code generated.')")
|
223 |
+
|
224 |
+
bot_thinking = f"๐ง **Thinking:** *{thought}*"
|
225 |
+
history[-1] = (user_message, bot_thinking)
|
226 |
+
yield history, None, None, state
|
227 |
+
|
228 |
+
local_vars = {'df': state['df_modified'], 'px': px, 'pd': pd, 'np': np}
|
229 |
+
stdout, fig_result, df_result, error = safe_exec(code_to_run, local_vars)
|
230 |
+
|
231 |
+
bot_response = bot_thinking + "\n\n---\n\n"
|
232 |
+
if error: bot_response += f"๐ฅ **Error:**\n```\n{error}\n```"
|
233 |
+
if stdout: bot_response += f"๐ **Output:**\n```\n{stdout}\n```"
|
234 |
+
if not error and not stdout and not fig_result and not isinstance(df_result, pd.DataFrame):
|
235 |
+
bot_response += "โ
Code executed, but produced no direct output."
|
236 |
+
|
237 |
+
history[-1] = (user_message, bot_response)
|
238 |
+
state['chat_history'] = history # Persist chat history
|
239 |
+
yield history, fig_result, df_result, state
|
240 |
+
|
241 |
+
except Exception as e:
|
242 |
+
error_msg = f"A critical error occurred with the AI model: {e}"
|
243 |
+
history[-1] = (user_message, error_msg)
|
244 |
+
yield history, None, None, state
|
245 |
|
246 |
+
# --- UI Builder ---
|
247 |
def build_ui():
|
248 |
+
"""Constructs the entire Gradio application interface."""
|
249 |
with gr.Blocks(theme=THEME, title="Odyssey AI Data Workspace") as demo:
|
250 |
state = gr.State(init_state())
|
251 |
|
|
|
253 |
# Left Sidebar - Command Center
|
254 |
with gr.Column(scale=1):
|
255 |
gr.Markdown("# ๐ฆ Odyssey")
|
|
|
256 |
with gr.Accordion("๐ Project", open=True):
|
257 |
project_name_input = gr.Textbox(label="Project Name", value="New_Project")
|
258 |
file_input = gr.File(label="Upload CSV", file_types=[".csv"])
|
259 |
+
api_key_input = gr.Textbox(label="๐ Gemini API Key", type="password", placeholder="Enter key...")
|
260 |
with gr.Row():
|
261 |
save_btn = gr.Button("Save")
|
262 |
load_btn = gr.UploadButton("Load .odyssey")
|
|
|
264 |
|
265 |
# Navigation buttons
|
266 |
overview_btn = gr.Button(f"{ICONS['overview']} Helios Overview")
|
|
|
267 |
launchpad_btn = gr.Button(f"{ICONS['launchpad']} Prometheus Launchpad")
|
268 |
copilot_btn = gr.Button(f"{ICONS['copilot']} Athena Co-pilot")
|
269 |
+
export_btn = gr.Button(f"{ICONS['export']} Export Report", visible=False)
|
|
|
|
|
|
|
|
|
270 |
|
271 |
# Right Panel - Main Workspace
|
272 |
with gr.Column(scale=4):
|
273 |
# --- Helios Overview Panel ---
|
274 |
with gr.Column(visible=True) as overview_panel:
|
275 |
gr.Markdown(f"# {ICONS['overview']} Helios Overview")
|
276 |
+
helios_report_md = gr.Markdown("Upload a CSV and provide a project name to begin your Odyssey.")
|
|
|
|
|
277 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
278 |
# --- Prometheus Launchpad Panel ---
|
279 |
with gr.Column(visible=False) as launchpad_panel:
|
280 |
gr.Markdown(f"# {ICONS['launchpad']} Prometheus Launchpad")
|
|
|
|
|
281 |
with gr.Row():
|
282 |
lp_target = gr.Dropdown(label="๐ฏ Target")
|
283 |
+
# CORRECTED LINE: Use gr.Dropdown with multiselect=True
|
284 |
+
lp_features = gr.Dropdown(label="โจ Features", multiselect=True)
|
285 |
lp_model = gr.Dropdown(choices=["Random Forest", "Logistic Regression", "Linear Regression"], label="๐ง Model")
|
286 |
lp_run_btn = gr.Button("๐ Launch Model Training (with CV)")
|
287 |
lp_report_md = gr.Markdown()
|
|
|
292 |
# --- Athena Co-pilot Panel ---
|
293 |
with gr.Column(visible=False) as copilot_panel:
|
294 |
gr.Markdown(f"# {ICONS['copilot']} Athena Co-pilot")
|
295 |
+
chatbot = gr.Chatbot(height=500, label="Chat History")
|
296 |
+
with gr.Accordion("AI Generated Results", open=True):
|
297 |
+
copilot_fig_output = gr.Plot()
|
298 |
+
copilot_df_output = gr.DataFrame(interactive=False)
|
299 |
+
chat_input = gr.Textbox(label="Your Request", placeholder="e.g., 'What's the correlation between all numeric columns?'")
|
|
|
300 |
chat_submit = gr.Button("Send", variant="primary")
|
301 |
|
302 |
# --- Event Handling ---
|
303 |
+
panels = [overview_panel, launchpad_panel, copilot_panel]
|
|
|
|
|
304 |
def switch_panel(btn_idx):
|
305 |
return [gr.update(visible=i == btn_idx) for i in range(len(panels))]
|
306 |
|
307 |
overview_btn.click(lambda: switch_panel(0), None, panels)
|
308 |
+
launchpad_btn.click(lambda: switch_panel(1), None, panels)
|
309 |
+
copilot_btn.click(lambda: switch_panel(2), None, panels)
|
|
|
310 |
|
311 |
+
def on_upload_or_load(state_data):
|
312 |
+
"""Unified function to update UI after data is loaded or a project is loaded."""
|
|
|
|
|
313 |
helios_md = "No data loaded."
|
314 |
+
if state_data and state_data.get('insights'):
|
315 |
+
insights = state_data['insights']
|
316 |
+
md = f"## ๐ญ Proactive Insights for `{state_data.get('project_name')}`\n"
|
317 |
+
md += f"Dataset has **{state_data['metadata']['shape'][0]} rows** and **{state_data['metadata']['shape'][1]} columns**.\n\n"
|
318 |
+
if suggestions := insights.get('ml_suggestions'):
|
319 |
+
md += "### ๐ฎ Potential ML Targets\n" + "\n".join(f"- `{s}`" for s in suggestions) + "\n"
|
320 |
+
if not insights.get('missing_data', pd.Series()).empty:
|
321 |
+
md += "\n### ๐ง Missing Data\nFound missing values in these columns:\n" + insights['missing_data'].to_frame('Missing Count').to_markdown() + "\n"
|
322 |
+
helios_md = md
|
323 |
|
324 |
+
all_cols = state_data.get('metadata', {}).get('columns', [])
|
325 |
+
return {
|
326 |
+
state: state_data,
|
327 |
+
helios_report_md: helios_md,
|
328 |
+
lp_target: gr.update(choices=all_cols),
|
329 |
+
lp_features: gr.update(choices=all_cols),
|
330 |
+
chatbot: state_data.get('chat_history', [])
|
331 |
+
}
|
332 |
|
333 |
+
file_input.upload(prime_data, [file_input, project_name_input], state).then(
|
334 |
+
on_upload_or_load, state, [state, helios_report_md, lp_target, lp_features, chatbot]
|
335 |
+
)
|
336 |
+
load_btn.upload(load_project, load_btn, state).then(
|
337 |
+
on_upload_or_load, state, [state, helios_report_md, lp_target, lp_features, chatbot]
|
338 |
+
)
|
339 |
save_btn.click(save_project, state, project_status)
|
340 |
|
|
|
|
|
|
|
|
|
|
|
|
|
341 |
lp_run_btn.click(prometheus_run_model, [state, lp_target, lp_features, lp_model], [lp_fig1, lp_fig2, lp_report_md])
|
342 |
+
|
343 |
+
chat_submit.click(
|
344 |
+
athena_respond,
|
345 |
+
[chat_input, chatbot, state, api_key_input],
|
346 |
+
[chatbot, copilot_fig_output, copilot_df_output, state]
|
347 |
+
).then(lambda: "", outputs=chat_input)
|
348 |
|
349 |
return demo
|
350 |
|