Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,12 +1,13 @@
|
|
| 1 |
# Odyssey - The AI Data Science Workspace
|
| 2 |
-
# A
|
|
|
|
| 3 |
|
| 4 |
import gradio as gr
|
| 5 |
import pandas as pd
|
| 6 |
import numpy as np
|
| 7 |
import plotly.express as px
|
| 8 |
import plotly.graph_objects as go
|
| 9 |
-
import io, os, json,
|
| 10 |
from contextlib import redirect_stdout
|
| 11 |
from datetime import datetime
|
| 12 |
|
|
@@ -18,13 +19,20 @@ from sklearn.metrics import roc_curve, auc, confusion_matrix, r2_score, mean_squ
|
|
| 18 |
from sklearn.preprocessing import LabelEncoder
|
| 19 |
from sklearn.impute import KNNImputer
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
# --- Configuration ---
|
| 22 |
warnings.filterwarnings('ignore')
|
| 23 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
| 24 |
|
| 25 |
# --- UI Theme & Icons ---
|
| 26 |
THEME = gr.themes.Monochrome(primary_hue="indigo", secondary_hue="blue", neutral_hue="slate").set(
|
| 27 |
-
body_background_fill="radial-gradient(circle, rgba(
|
| 28 |
block_label_background_fill="rgba(255,255,255,0.05)",
|
| 29 |
block_background_fill="rgba(255,255,255,0.05)",
|
| 30 |
button_primary_background_fill="linear-gradient(90deg, #6A11CB 0%, #2575FC 100%)",
|
|
@@ -33,57 +41,49 @@ THEME = gr.themes.Monochrome(primary_hue="indigo", secondary_hue="blue", neutral
|
|
| 33 |
)
|
| 34 |
ICONS = {"overview": "๐ญ", "medic": "๐งช", "launchpad": "๐", "copilot": "๐ก", "export": "๐"}
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
# --- Core State & Project Management ---
|
| 37 |
def init_state():
|
| 38 |
-
"""Initializes a blank global state."""
|
| 39 |
return {
|
| 40 |
-
"project_name": None,
|
| 41 |
-
"
|
| 42 |
-
"df_modified": None,
|
| 43 |
-
"metadata": None,
|
| 44 |
-
"insights": None,
|
| 45 |
-
"chat_history": [],
|
| 46 |
-
"dynamic_dashboards": {}
|
| 47 |
}
|
| 48 |
|
| 49 |
def save_project(state):
|
| 50 |
-
"""Saves the entire application state to a .
|
| 51 |
if not state or not state.get("project_name"):
|
| 52 |
return gr.update(value="Project needs a name to save.", interactive=True)
|
| 53 |
|
| 54 |
filename = f"{state['project_name']}.odyssey"
|
| 55 |
-
# Convert dataframes to pickle strings for serialization
|
| 56 |
-
state_to_save = state.copy()
|
| 57 |
-
if state_to_save['df_original'] is not None:
|
| 58 |
-
state_to_save['df_original'] = state_to_save['df_original'].to_pickle()
|
| 59 |
-
if state_to_save['df_modified'] is not None:
|
| 60 |
-
state_to_save['df_modified'] = state_to_save['df_modified'].to_pickle()
|
| 61 |
-
|
| 62 |
with open(filename, "wb") as f:
|
| 63 |
-
pickle.dump(
|
| 64 |
-
|
| 65 |
-
return gr.update(value=f"Project saved to {filename}", interactive=True)
|
| 66 |
|
| 67 |
def load_project(file_obj):
|
| 68 |
"""Loads a .odyssey file into the application state."""
|
| 69 |
if not file_obj: return init_state()
|
| 70 |
with open(file_obj.name, "rb") as f:
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
# Unpickle dataframes
|
| 74 |
-
if loaded_state['df_original'] is not None:
|
| 75 |
-
loaded_state['df_original'] = pd.read_pickle(io.BytesIO(loaded_state['df_original']))
|
| 76 |
-
if loaded_state['df_modified'] is not None:
|
| 77 |
-
loaded_state['df_modified'] = pd.read_pickle(io.BytesIO(loaded_state['df_modified']))
|
| 78 |
-
|
| 79 |
-
return loaded_state
|
| 80 |
|
| 81 |
def prime_data(file_obj, project_name):
|
| 82 |
"""Main function to load a new CSV, analyze it, and set the initial state."""
|
| 83 |
if not file_obj: return init_state()
|
| 84 |
df = pd.read_csv(file_obj.name)
|
| 85 |
|
| 86 |
-
# Smart type conversion
|
| 87 |
for col in df.select_dtypes(include=['object']).columns:
|
| 88 |
try:
|
| 89 |
df[col] = pd.to_datetime(df[col], errors='raise')
|
|
@@ -96,35 +96,29 @@ def prime_data(file_obj, project_name):
|
|
| 96 |
|
| 97 |
return {
|
| 98 |
"project_name": project_name or f"Project_{datetime.now().strftime('%Y%m%d_%H%M')}",
|
| 99 |
-
"df_original": df,
|
| 100 |
-
"
|
| 101 |
-
"metadata": metadata,
|
| 102 |
-
"insights": insights,
|
| 103 |
-
"chat_history": [],
|
| 104 |
-
"dynamic_dashboards": {}
|
| 105 |
}
|
| 106 |
|
| 107 |
def extract_metadata(df):
|
| 108 |
"""Utility to get schema and column types."""
|
| 109 |
return {
|
| 110 |
-
'shape': df.shape,
|
| 111 |
-
'columns': df.columns.tolist(),
|
| 112 |
'numeric': df.select_dtypes(include=np.number).columns.tolist(),
|
| 113 |
'categorical': df.select_dtypes(include=['object', 'category']).columns.tolist(),
|
| 114 |
'datetime': df.select_dtypes(include='datetime').columns.tolist(),
|
| 115 |
'dtypes': df.dtypes.apply(lambda x: x.name).to_dict()
|
| 116 |
}
|
| 117 |
|
| 118 |
-
# ---
|
|
|
|
| 119 |
def run_helios_engine(df, metadata):
|
| 120 |
-
"""The proactive analysis engine."""
|
| 121 |
insights = {}
|
| 122 |
-
# Missing Data
|
| 123 |
missing = df.isnull().sum()
|
| 124 |
insights['missing_data'] = missing[missing > 0].sort_values(ascending=False)
|
| 125 |
-
# High Cardinality
|
| 126 |
insights['high_cardinality'] = {c: df[c].nunique() for c in metadata['categorical'] if df[c].nunique() > 50}
|
| 127 |
-
|
| 128 |
outliers = {}
|
| 129 |
for col in metadata['numeric']:
|
| 130 |
Q1, Q3 = df[col].quantile(0.25), df[col].quantile(0.75)
|
|
@@ -132,7 +126,7 @@ def run_helios_engine(df, metadata):
|
|
| 132 |
count = ((df[col] < (Q1 - 1.5 * IQR)) | (df[col] > (Q3 + 1.5 * IQR))).sum()
|
| 133 |
if count > 0: outliers[col] = count
|
| 134 |
insights['outliers'] = outliers
|
| 135 |
-
|
| 136 |
suggestions = []
|
| 137 |
for col in metadata['categorical']:
|
| 138 |
if df[col].nunique() == 2: suggestions.append(f"{col} (Classification)")
|
|
@@ -141,57 +135,21 @@ def run_helios_engine(df, metadata):
|
|
| 141 |
insights['ml_suggestions'] = suggestions
|
| 142 |
return insights
|
| 143 |
|
| 144 |
-
# --- Asclepius Data Lab Handlers ---
|
| 145 |
-
def medic_preview_imputation(state, col, num_method, cat_method):
|
| 146 |
-
if not col or col not in state['df_modified'].columns: return None
|
| 147 |
-
df_mod = state['df_modified'].copy()
|
| 148 |
-
|
| 149 |
-
if col in state['metadata']['numeric']:
|
| 150 |
-
if num_method == 'KNN':
|
| 151 |
-
imputer = KNNImputer(n_neighbors=5)
|
| 152 |
-
df_mod[col] = imputer.fit_transform(df_mod[[col]])
|
| 153 |
-
else:
|
| 154 |
-
value = df_mod[col].mean() if num_method == 'mean' else df_mod[col].median()
|
| 155 |
-
df_mod[col].fillna(value, inplace=True)
|
| 156 |
-
|
| 157 |
-
fig = go.Figure()
|
| 158 |
-
fig.add_trace(go.Histogram(x=state['df_original'][col], name='Original', opacity=0.7))
|
| 159 |
-
fig.add_trace(go.Histogram(x=df_mod[col], name='Imputed', opacity=0.7))
|
| 160 |
-
fig.update_layout(barmode='overlay', title_text=f"Distribution for '{col}'", legend_title_text='Dataset')
|
| 161 |
-
return fig
|
| 162 |
-
|
| 163 |
-
elif col in state['metadata']['categorical']:
|
| 164 |
-
if cat_method == "Create 'Missing' Category":
|
| 165 |
-
df_mod[col] = df_mod[col].cat.add_categories("Missing").fillna("Missing") if hasattr(df_mod[col], 'cat') else df_mod[col].fillna("Missing")
|
| 166 |
-
else: # Mode
|
| 167 |
-
df_mod[col].fillna(df_mod[col].mode()[0], inplace=True)
|
| 168 |
-
|
| 169 |
-
fig = go.Figure()
|
| 170 |
-
fig.add_trace(go.Bar(x=state['df_original'][col].value_counts().index, y=state['df_original'][col].value_counts().values, name='Original'))
|
| 171 |
-
fig.add_trace(go.Bar(x=df_mod[col].value_counts().index, y=df_mod[col].value_counts().values, name='Imputed'))
|
| 172 |
-
return fig
|
| 173 |
-
return None
|
| 174 |
-
|
| 175 |
-
# --- Prometheus Launchpad Handlers ---
|
| 176 |
def prometheus_run_model(state, target, features, model_name):
|
|
|
|
| 177 |
if not target or not features: return None, None, "Select target and features."
|
| 178 |
df = state['df_modified'].copy()
|
| 179 |
df.dropna(subset=[target] + features, inplace=True)
|
| 180 |
|
| 181 |
-
le_map = {}
|
| 182 |
for col in [target] + features:
|
| 183 |
if df[col].dtype.name in ['category', 'object']:
|
| 184 |
-
|
| 185 |
-
df[col] = le.fit_transform(df[col])
|
| 186 |
-
le_map[col] = le
|
| 187 |
|
| 188 |
X, y = df[features], df[target]
|
| 189 |
problem_type = "Classification" if y.nunique() <= 10 else "Regression"
|
| 190 |
|
| 191 |
-
MODELS = {
|
| 192 |
-
|
| 193 |
-
"Regression": {"Random Forest": RandomForestRegressor, "Linear Regression": LinearRegression}
|
| 194 |
-
}
|
| 195 |
if model_name not in MODELS[problem_type]: return None, None, "Invalid model for this problem type."
|
| 196 |
|
| 197 |
model = MODELS[problem_type][model_name](random_state=42)
|
|
@@ -199,63 +157,95 @@ def prometheus_run_model(state, target, features, model_name):
|
|
| 199 |
if problem_type == "Classification":
|
| 200 |
scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
|
| 201 |
report = f"**Cross-Validated Accuracy:** {np.mean(scores):.3f} ยฑ {np.std(scores):.3f}"
|
| 202 |
-
|
| 203 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
| 204 |
model.fit(X_train, y_train)
|
| 205 |
-
|
| 206 |
-
# ROC Curve
|
| 207 |
y_prob = model.predict_proba(X_test)[:, 1]
|
| 208 |
fpr, tpr, _ = roc_curve(y_test, y_prob)
|
| 209 |
-
|
| 210 |
-
fig1
|
| 211 |
-
fig1.add_scatter(x=[0, 1], y=[0, 1], mode='lines', line=dict(dash='dash'), name='Random Chance')
|
| 212 |
fig1.update_layout(title="ROC Curve")
|
| 213 |
-
|
| 214 |
-
# Feature Importance
|
| 215 |
-
if hasattr(model, 'feature_importances_'):
|
| 216 |
-
fi = pd.Series(model.feature_importances_, index=features).sort_values(ascending=False)
|
| 217 |
-
fig2 = px.bar(fi, title="Feature Importance")
|
| 218 |
-
else: fig2 = go.Figure().update_layout(title="Feature Importance (Not available for this model)")
|
| 219 |
-
|
| 220 |
-
return fig1, fig2, report
|
| 221 |
else: # Regression
|
| 222 |
scores = cross_val_score(model, X, y, cv=5, scoring='r2')
|
| 223 |
report = f"**Cross-Validated Rยฒ Score:** {np.mean(scores):.3f} ยฑ {np.std(scores):.3f}"
|
| 224 |
-
|
| 225 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
| 226 |
model.fit(X_train, y_train)
|
| 227 |
preds = model.predict(X_test)
|
| 228 |
-
|
| 229 |
-
# Residuals Plot
|
| 230 |
residuals = y_test - preds
|
| 231 |
-
fig1 = px.scatter(x=preds, y=residuals, title="Residuals vs. Predicted
|
| 232 |
fig1.add_hline(y=0, line_dash="dash")
|
| 233 |
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
|
| 242 |
-
# --- Athena Co-pilot Handlers ---
|
| 243 |
def athena_respond(user_message, history, state, api_key):
|
| 244 |
-
|
| 245 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
|
| 257 |
-
# --- UI Builder
|
| 258 |
def build_ui():
|
|
|
|
| 259 |
with gr.Blocks(theme=THEME, title="Odyssey AI Data Workspace") as demo:
|
| 260 |
state = gr.State(init_state())
|
| 261 |
|
|
@@ -263,10 +253,10 @@ def build_ui():
|
|
| 263 |
# Left Sidebar - Command Center
|
| 264 |
with gr.Column(scale=1):
|
| 265 |
gr.Markdown("# ๐ฆ Odyssey")
|
| 266 |
-
|
| 267 |
with gr.Accordion("๐ Project", open=True):
|
| 268 |
project_name_input = gr.Textbox(label="Project Name", value="New_Project")
|
| 269 |
file_input = gr.File(label="Upload CSV", file_types=[".csv"])
|
|
|
|
| 270 |
with gr.Row():
|
| 271 |
save_btn = gr.Button("Save")
|
| 272 |
load_btn = gr.UploadButton("Load .odyssey")
|
|
@@ -274,44 +264,24 @@ def build_ui():
|
|
| 274 |
|
| 275 |
# Navigation buttons
|
| 276 |
overview_btn = gr.Button(f"{ICONS['overview']} Helios Overview")
|
| 277 |
-
medic_btn = gr.Button(f"{ICONS['medic']} Asclepius Data Lab")
|
| 278 |
launchpad_btn = gr.Button(f"{ICONS['launchpad']} Prometheus Launchpad")
|
| 279 |
copilot_btn = gr.Button(f"{ICONS['copilot']} Athena Co-pilot")
|
| 280 |
-
export_btn = gr.Button(f"{ICONS['export']} Export Report")
|
| 281 |
-
|
| 282 |
-
# Global Info
|
| 283 |
-
with gr.Accordion("Global Info", open=False):
|
| 284 |
-
file_info_md = gr.Markdown("No file loaded.")
|
| 285 |
|
| 286 |
# Right Panel - Main Workspace
|
| 287 |
with gr.Column(scale=4):
|
| 288 |
# --- Helios Overview Panel ---
|
| 289 |
with gr.Column(visible=True) as overview_panel:
|
| 290 |
gr.Markdown(f"# {ICONS['overview']} Helios Overview")
|
| 291 |
-
gr.Markdown("
|
| 292 |
-
# Interactive dashboard components would go here
|
| 293 |
-
helios_report_md = gr.Markdown("Upload data to begin analysis.")
|
| 294 |
|
| 295 |
-
# --- Asclepius Data Lab Panel ---
|
| 296 |
-
with gr.Column(visible=False) as medic_panel:
|
| 297 |
-
gr.Markdown(f"# {ICONS['medic']} Asclepius Data Lab")
|
| 298 |
-
gr.Markdown("Interactively clean and prepare your data.")
|
| 299 |
-
# UI components for Data Medic
|
| 300 |
-
medic_col_select = gr.Dropdown(label="Select Column to Clean")
|
| 301 |
-
with gr.Row():
|
| 302 |
-
medic_num_method = gr.Radio(['mean', 'median', 'KNN'], label="Numeric Imputation", value='mean')
|
| 303 |
-
medic_cat_method = gr.Radio(['mode', "Create 'Missing' Category"], label="Categorical Imputation", value='mode')
|
| 304 |
-
medic_preview_plot = gr.Plot()
|
| 305 |
-
medic_apply_btn = gr.Button("Apply Changes to Session")
|
| 306 |
-
|
| 307 |
# --- Prometheus Launchpad Panel ---
|
| 308 |
with gr.Column(visible=False) as launchpad_panel:
|
| 309 |
gr.Markdown(f"# {ICONS['launchpad']} Prometheus Launchpad")
|
| 310 |
-
gr.Markdown("Train, evaluate, and understand predictive models.")
|
| 311 |
-
# UI components for Launchpad
|
| 312 |
with gr.Row():
|
| 313 |
lp_target = gr.Dropdown(label="๐ฏ Target")
|
| 314 |
-
|
|
|
|
| 315 |
lp_model = gr.Dropdown(choices=["Random Forest", "Logistic Regression", "Linear Regression"], label="๐ง Model")
|
| 316 |
lp_run_btn = gr.Button("๐ Launch Model Training (with CV)")
|
| 317 |
lp_report_md = gr.Markdown()
|
|
@@ -322,54 +292,59 @@ def build_ui():
|
|
| 322 |
# --- Athena Co-pilot Panel ---
|
| 323 |
with gr.Column(visible=False) as copilot_panel:
|
| 324 |
gr.Markdown(f"# {ICONS['copilot']} Athena Co-pilot")
|
| 325 |
-
gr.
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
chat_input = gr.Textbox(label="Your Request")
|
| 331 |
chat_submit = gr.Button("Send", variant="primary")
|
| 332 |
|
| 333 |
# --- Event Handling ---
|
| 334 |
-
|
| 335 |
-
# Panel Navigation
|
| 336 |
-
panels = [overview_panel, medic_panel, launchpad_panel, copilot_panel]
|
| 337 |
def switch_panel(btn_idx):
|
| 338 |
return [gr.update(visible=i == btn_idx) for i in range(len(panels))]
|
| 339 |
|
| 340 |
overview_btn.click(lambda: switch_panel(0), None, panels)
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
copilot_btn.click(lambda: switch_panel(3), None, panels)
|
| 344 |
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
new_state = prime_data(file, name)
|
| 348 |
-
# Update all UI components based on the new state
|
| 349 |
helios_md = "No data loaded."
|
| 350 |
-
if
|
| 351 |
-
|
| 352 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
|
|
|
|
|
|
| 360 |
|
| 361 |
-
file_input.upload(
|
| 362 |
-
|
| 363 |
-
|
|
|
|
|
|
|
|
|
|
| 364 |
save_btn.click(save_project, state, project_status)
|
| 365 |
|
| 366 |
-
# Asclepius Live Preview
|
| 367 |
-
medic_col_select.change(medic_preview_imputation, [state, medic_col_select, medic_num_method, medic_cat_method], medic_preview_plot)
|
| 368 |
-
medic_num_method.change(medic_preview_imputation, [state, medic_col_select, medic_num_method, medic_cat_method], medic_preview_plot)
|
| 369 |
-
medic_cat_method.change(medic_preview_imputation, [state, medic_col_select, medic_num_method, medic_cat_method], medic_preview_plot)
|
| 370 |
-
|
| 371 |
-
# Prometheus Model Training
|
| 372 |
lp_run_btn.click(prometheus_run_model, [state, lp_target, lp_features, lp_model], [lp_fig1, lp_fig2, lp_report_md])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 373 |
|
| 374 |
return demo
|
| 375 |
|
|
|
|
| 1 |
# Odyssey - The AI Data Science Workspace
|
| 2 |
+
# A state-of-the-art, AI-native analytic environment.
|
| 3 |
+
# This script is a complete, self-contained Gradio application.
|
| 4 |
|
| 5 |
import gradio as gr
|
| 6 |
import pandas as pd
|
| 7 |
import numpy as np
|
| 8 |
import plotly.express as px
|
| 9 |
import plotly.graph_objects as go
|
| 10 |
+
import io, os, json, pickle, logging, warnings, uuid
|
| 11 |
from contextlib import redirect_stdout
|
| 12 |
from datetime import datetime
|
| 13 |
|
|
|
|
| 19 |
from sklearn.preprocessing import LabelEncoder
|
| 20 |
from sklearn.impute import KNNImputer
|
| 21 |
|
| 22 |
+
# Optional: For AI features
|
| 23 |
+
try:
|
| 24 |
+
import google.generativeai as genai
|
| 25 |
+
except ImportError:
|
| 26 |
+
print("Warning: 'google-generativeai' not found. AI features will be disabled.")
|
| 27 |
+
genai = None
|
| 28 |
+
|
| 29 |
# --- Configuration ---
|
| 30 |
warnings.filterwarnings('ignore')
|
| 31 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
| 32 |
|
| 33 |
# --- UI Theme & Icons ---
|
| 34 |
THEME = gr.themes.Monochrome(primary_hue="indigo", secondary_hue="blue", neutral_hue="slate").set(
|
| 35 |
+
body_background_fill="radial-gradient(circle, rgba(10,20,50,1) 0%, rgba(0,0,10,1) 100%);",
|
| 36 |
block_label_background_fill="rgba(255,255,255,0.05)",
|
| 37 |
block_background_fill="rgba(255,255,255,0.05)",
|
| 38 |
button_primary_background_fill="linear-gradient(90deg, #6A11CB 0%, #2575FC 100%)",
|
|
|
|
| 41 |
)
|
| 42 |
ICONS = {"overview": "๐ญ", "medic": "๐งช", "launchpad": "๐", "copilot": "๐ก", "export": "๐"}
|
| 43 |
|
| 44 |
+
# --- Helper Functions ---
|
| 45 |
+
def safe_exec(code_string: str, local_vars: dict) -> tuple:
|
| 46 |
+
"""Safely execute a string of Python code and capture its output."""
|
| 47 |
+
output_buffer = io.StringIO()
|
| 48 |
+
try:
|
| 49 |
+
with redirect_stdout(output_buffer):
|
| 50 |
+
exec(code_string, globals(), local_vars)
|
| 51 |
+
stdout = output_buffer.getvalue()
|
| 52 |
+
fig = local_vars.get('fig')
|
| 53 |
+
df_out = local_vars.get('df_result')
|
| 54 |
+
return stdout, fig, df_out, None
|
| 55 |
+
except Exception as e:
|
| 56 |
+
return None, None, None, f"Execution Error: {str(e)}"
|
| 57 |
+
|
| 58 |
# --- Core State & Project Management ---
|
| 59 |
def init_state():
|
| 60 |
+
"""Initializes a blank global state dictionary."""
|
| 61 |
return {
|
| 62 |
+
"project_name": None, "df_original": None, "df_modified": None,
|
| 63 |
+
"metadata": None, "insights": None, "chat_history": []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
}
|
| 65 |
|
| 66 |
def save_project(state):
|
| 67 |
+
"""Saves the entire application state to a .odyssey file."""
|
| 68 |
if not state or not state.get("project_name"):
|
| 69 |
return gr.update(value="Project needs a name to save.", interactive=True)
|
| 70 |
|
| 71 |
filename = f"{state['project_name']}.odyssey"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
with open(filename, "wb") as f:
|
| 73 |
+
pickle.dump(state, f)
|
| 74 |
+
return gr.update(value=f"โ
Project saved to {filename}", interactive=True)
|
|
|
|
| 75 |
|
| 76 |
def load_project(file_obj):
|
| 77 |
"""Loads a .odyssey file into the application state."""
|
| 78 |
if not file_obj: return init_state()
|
| 79 |
with open(file_obj.name, "rb") as f:
|
| 80 |
+
return pickle.load(f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
def prime_data(file_obj, project_name):
|
| 83 |
"""Main function to load a new CSV, analyze it, and set the initial state."""
|
| 84 |
if not file_obj: return init_state()
|
| 85 |
df = pd.read_csv(file_obj.name)
|
| 86 |
|
|
|
|
| 87 |
for col in df.select_dtypes(include=['object']).columns:
|
| 88 |
try:
|
| 89 |
df[col] = pd.to_datetime(df[col], errors='raise')
|
|
|
|
| 96 |
|
| 97 |
return {
|
| 98 |
"project_name": project_name or f"Project_{datetime.now().strftime('%Y%m%d_%H%M')}",
|
| 99 |
+
"df_original": df, "df_modified": df.copy(), "metadata": metadata,
|
| 100 |
+
"insights": insights, "chat_history": []
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
}
|
| 102 |
|
| 103 |
def extract_metadata(df):
|
| 104 |
"""Utility to get schema and column types."""
|
| 105 |
return {
|
| 106 |
+
'shape': df.shape, 'columns': df.columns.tolist(),
|
|
|
|
| 107 |
'numeric': df.select_dtypes(include=np.number).columns.tolist(),
|
| 108 |
'categorical': df.select_dtypes(include=['object', 'category']).columns.tolist(),
|
| 109 |
'datetime': df.select_dtypes(include='datetime').columns.tolist(),
|
| 110 |
'dtypes': df.dtypes.apply(lambda x: x.name).to_dict()
|
| 111 |
}
|
| 112 |
|
| 113 |
+
# --- Module-Specific Handlers ---
|
| 114 |
+
|
| 115 |
def run_helios_engine(df, metadata):
|
| 116 |
+
"""The proactive analysis engine for the Helios Overview."""
|
| 117 |
insights = {}
|
|
|
|
| 118 |
missing = df.isnull().sum()
|
| 119 |
insights['missing_data'] = missing[missing > 0].sort_values(ascending=False)
|
|
|
|
| 120 |
insights['high_cardinality'] = {c: df[c].nunique() for c in metadata['categorical'] if df[c].nunique() > 50}
|
| 121 |
+
|
| 122 |
outliers = {}
|
| 123 |
for col in metadata['numeric']:
|
| 124 |
Q1, Q3 = df[col].quantile(0.25), df[col].quantile(0.75)
|
|
|
|
| 126 |
count = ((df[col] < (Q1 - 1.5 * IQR)) | (df[col] > (Q3 + 1.5 * IQR))).sum()
|
| 127 |
if count > 0: outliers[col] = count
|
| 128 |
insights['outliers'] = outliers
|
| 129 |
+
|
| 130 |
suggestions = []
|
| 131 |
for col in metadata['categorical']:
|
| 132 |
if df[col].nunique() == 2: suggestions.append(f"{col} (Classification)")
|
|
|
|
| 135 |
insights['ml_suggestions'] = suggestions
|
| 136 |
return insights
|
| 137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
def prometheus_run_model(state, target, features, model_name):
|
| 139 |
+
"""Trains and evaluates a model in the Prometheus Launchpad."""
|
| 140 |
if not target or not features: return None, None, "Select target and features."
|
| 141 |
df = state['df_modified'].copy()
|
| 142 |
df.dropna(subset=[target] + features, inplace=True)
|
| 143 |
|
|
|
|
| 144 |
for col in [target] + features:
|
| 145 |
if df[col].dtype.name in ['category', 'object']:
|
| 146 |
+
df[col] = LabelEncoder().fit_transform(df[col])
|
|
|
|
|
|
|
| 147 |
|
| 148 |
X, y = df[features], df[target]
|
| 149 |
problem_type = "Classification" if y.nunique() <= 10 else "Regression"
|
| 150 |
|
| 151 |
+
MODELS = {"Classification": {"Random Forest": RandomForestClassifier, "Logistic Regression": LogisticRegression},
|
| 152 |
+
"Regression": {"Random Forest": RandomForestRegressor, "Linear Regression": LinearRegression}}
|
|
|
|
|
|
|
| 153 |
if model_name not in MODELS[problem_type]: return None, None, "Invalid model for this problem type."
|
| 154 |
|
| 155 |
model = MODELS[problem_type][model_name](random_state=42)
|
|
|
|
| 157 |
if problem_type == "Classification":
|
| 158 |
scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
|
| 159 |
report = f"**Cross-Validated Accuracy:** {np.mean(scores):.3f} ยฑ {np.std(scores):.3f}"
|
|
|
|
| 160 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
| 161 |
model.fit(X_train, y_train)
|
|
|
|
|
|
|
| 162 |
y_prob = model.predict_proba(X_test)[:, 1]
|
| 163 |
fpr, tpr, _ = roc_curve(y_test, y_prob)
|
| 164 |
+
fig1 = go.Figure(data=go.Scatter(x=fpr, y=tpr, mode='lines', name=f'ROC (AUC = {auc(fpr, tpr):.2f})'))
|
| 165 |
+
fig1.add_scatter(x=[0, 1], y=[0, 1], mode='lines', line=dict(dash='dash'), name='Random')
|
|
|
|
| 166 |
fig1.update_layout(title="ROC Curve")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
else: # Regression
|
| 168 |
scores = cross_val_score(model, X, y, cv=5, scoring='r2')
|
| 169 |
report = f"**Cross-Validated Rยฒ Score:** {np.mean(scores):.3f} ยฑ {np.std(scores):.3f}"
|
|
|
|
| 170 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
| 171 |
model.fit(X_train, y_train)
|
| 172 |
preds = model.predict(X_test)
|
|
|
|
|
|
|
| 173 |
residuals = y_test - preds
|
| 174 |
+
fig1 = px.scatter(x=preds, y=residuals, title="Residuals vs. Predicted", labels={'x': 'Predicted', 'y': 'Residuals'})
|
| 175 |
fig1.add_hline(y=0, line_dash="dash")
|
| 176 |
|
| 177 |
+
if hasattr(model, 'feature_importances_'):
|
| 178 |
+
fi = pd.Series(model.feature_importances_, index=features).sort_values(ascending=False)
|
| 179 |
+
fig2 = px.bar(fi, title="Feature Importance")
|
| 180 |
+
else:
|
| 181 |
+
fig2 = go.Figure().update_layout(title="Feature Importance (Not available)")
|
| 182 |
+
|
| 183 |
+
return fig1, fig2, report
|
| 184 |
|
|
|
|
| 185 |
def athena_respond(user_message, history, state, api_key):
|
| 186 |
+
"""Handles the chat interaction with the AI Co-pilot."""
|
| 187 |
+
if not genai:
|
| 188 |
+
history.append((user_message, "Google AI library not installed. Cannot use Athena."))
|
| 189 |
+
return history, None, None, state
|
| 190 |
+
if not api_key:
|
| 191 |
+
history.append((user_message, "Please enter your Gemini API key to use Athena."))
|
| 192 |
+
return history, None, None, state
|
| 193 |
+
|
| 194 |
+
history.append((user_message, None))
|
| 195 |
+
|
| 196 |
+
# Configure the API
|
| 197 |
+
genai.configure(api_key=api_key)
|
| 198 |
+
model = genai.GenerativeModel('gemini-1.5-flash')
|
| 199 |
|
| 200 |
+
prompt = f"""
|
| 201 |
+
You are 'Athena', an AI data scientist. Your goal is to help a user by writing and executing Python code on a pandas DataFrame named `df`.
|
| 202 |
+
|
| 203 |
+
**DataFrame Info:**
|
| 204 |
+
{state['df_modified'].info(verbose=False)}
|
| 205 |
+
|
| 206 |
+
**Instructions:**
|
| 207 |
+
1. Analyze the user's request: '{user_message}'.
|
| 208 |
+
2. Formulate a plan (thought).
|
| 209 |
+
3. Write Python code to execute the plan. You can use `pandas as pd`, `numpy as np`, and `plotly.express as px`.
|
| 210 |
+
4. To show a plot, assign it to a variable `fig`.
|
| 211 |
+
5. To show a dataframe, assign it to a variable `df_result`.
|
| 212 |
+
6. Use `print()` for text output.
|
| 213 |
+
7. **NEVER** modify `df` in place.
|
| 214 |
+
8. Respond **ONLY** with a single, valid JSON object with keys "thought" and "code".
|
| 215 |
+
|
| 216 |
+
**Your JSON Response:**
|
| 217 |
+
"""
|
| 218 |
+
try:
|
| 219 |
+
response = model.generate_content(prompt)
|
| 220 |
+
response_json = json.loads(response.text.strip().replace("```json", "").replace("```", ""))
|
| 221 |
+
thought = response_json.get("thought", "Thinking...")
|
| 222 |
+
code_to_run = response_json.get("code", "print('No code generated.')")
|
| 223 |
+
|
| 224 |
+
bot_thinking = f"๐ง **Thinking:** *{thought}*"
|
| 225 |
+
history[-1] = (user_message, bot_thinking)
|
| 226 |
+
yield history, None, None, state
|
| 227 |
+
|
| 228 |
+
local_vars = {'df': state['df_modified'], 'px': px, 'pd': pd, 'np': np}
|
| 229 |
+
stdout, fig_result, df_result, error = safe_exec(code_to_run, local_vars)
|
| 230 |
+
|
| 231 |
+
bot_response = bot_thinking + "\n\n---\n\n"
|
| 232 |
+
if error: bot_response += f"๐ฅ **Error:**\n```\n{error}\n```"
|
| 233 |
+
if stdout: bot_response += f"๐ **Output:**\n```\n{stdout}\n```"
|
| 234 |
+
if not error and not stdout and not fig_result and not isinstance(df_result, pd.DataFrame):
|
| 235 |
+
bot_response += "โ
Code executed, but produced no direct output."
|
| 236 |
+
|
| 237 |
+
history[-1] = (user_message, bot_response)
|
| 238 |
+
state['chat_history'] = history # Persist chat history
|
| 239 |
+
yield history, fig_result, df_result, state
|
| 240 |
+
|
| 241 |
+
except Exception as e:
|
| 242 |
+
error_msg = f"A critical error occurred with the AI model: {e}"
|
| 243 |
+
history[-1] = (user_message, error_msg)
|
| 244 |
+
yield history, None, None, state
|
| 245 |
|
| 246 |
+
# --- UI Builder ---
|
| 247 |
def build_ui():
|
| 248 |
+
"""Constructs the entire Gradio application interface."""
|
| 249 |
with gr.Blocks(theme=THEME, title="Odyssey AI Data Workspace") as demo:
|
| 250 |
state = gr.State(init_state())
|
| 251 |
|
|
|
|
| 253 |
# Left Sidebar - Command Center
|
| 254 |
with gr.Column(scale=1):
|
| 255 |
gr.Markdown("# ๐ฆ Odyssey")
|
|
|
|
| 256 |
with gr.Accordion("๐ Project", open=True):
|
| 257 |
project_name_input = gr.Textbox(label="Project Name", value="New_Project")
|
| 258 |
file_input = gr.File(label="Upload CSV", file_types=[".csv"])
|
| 259 |
+
api_key_input = gr.Textbox(label="๐ Gemini API Key", type="password", placeholder="Enter key...")
|
| 260 |
with gr.Row():
|
| 261 |
save_btn = gr.Button("Save")
|
| 262 |
load_btn = gr.UploadButton("Load .odyssey")
|
|
|
|
| 264 |
|
| 265 |
# Navigation buttons
|
| 266 |
overview_btn = gr.Button(f"{ICONS['overview']} Helios Overview")
|
|
|
|
| 267 |
launchpad_btn = gr.Button(f"{ICONS['launchpad']} Prometheus Launchpad")
|
| 268 |
copilot_btn = gr.Button(f"{ICONS['copilot']} Athena Co-pilot")
|
| 269 |
+
export_btn = gr.Button(f"{ICONS['export']} Export Report", visible=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
|
| 271 |
# Right Panel - Main Workspace
|
| 272 |
with gr.Column(scale=4):
|
| 273 |
# --- Helios Overview Panel ---
|
| 274 |
with gr.Column(visible=True) as overview_panel:
|
| 275 |
gr.Markdown(f"# {ICONS['overview']} Helios Overview")
|
| 276 |
+
helios_report_md = gr.Markdown("Upload a CSV and provide a project name to begin your Odyssey.")
|
|
|
|
|
|
|
| 277 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
# --- Prometheus Launchpad Panel ---
|
| 279 |
with gr.Column(visible=False) as launchpad_panel:
|
| 280 |
gr.Markdown(f"# {ICONS['launchpad']} Prometheus Launchpad")
|
|
|
|
|
|
|
| 281 |
with gr.Row():
|
| 282 |
lp_target = gr.Dropdown(label="๐ฏ Target")
|
| 283 |
+
# CORRECTED LINE: Use gr.Dropdown with multiselect=True
|
| 284 |
+
lp_features = gr.Dropdown(label="โจ Features", multiselect=True)
|
| 285 |
lp_model = gr.Dropdown(choices=["Random Forest", "Logistic Regression", "Linear Regression"], label="๐ง Model")
|
| 286 |
lp_run_btn = gr.Button("๐ Launch Model Training (with CV)")
|
| 287 |
lp_report_md = gr.Markdown()
|
|
|
|
| 292 |
# --- Athena Co-pilot Panel ---
|
| 293 |
with gr.Column(visible=False) as copilot_panel:
|
| 294 |
gr.Markdown(f"# {ICONS['copilot']} Athena Co-pilot")
|
| 295 |
+
chatbot = gr.Chatbot(height=500, label="Chat History")
|
| 296 |
+
with gr.Accordion("AI Generated Results", open=True):
|
| 297 |
+
copilot_fig_output = gr.Plot()
|
| 298 |
+
copilot_df_output = gr.DataFrame(interactive=False)
|
| 299 |
+
chat_input = gr.Textbox(label="Your Request", placeholder="e.g., 'What's the correlation between all numeric columns?'")
|
|
|
|
| 300 |
chat_submit = gr.Button("Send", variant="primary")
|
| 301 |
|
| 302 |
# --- Event Handling ---
|
| 303 |
+
panels = [overview_panel, launchpad_panel, copilot_panel]
|
|
|
|
|
|
|
| 304 |
def switch_panel(btn_idx):
|
| 305 |
return [gr.update(visible=i == btn_idx) for i in range(len(panels))]
|
| 306 |
|
| 307 |
overview_btn.click(lambda: switch_panel(0), None, panels)
|
| 308 |
+
launchpad_btn.click(lambda: switch_panel(1), None, panels)
|
| 309 |
+
copilot_btn.click(lambda: switch_panel(2), None, panels)
|
|
|
|
| 310 |
|
| 311 |
+
def on_upload_or_load(state_data):
|
| 312 |
+
"""Unified function to update UI after data is loaded or a project is loaded."""
|
|
|
|
|
|
|
| 313 |
helios_md = "No data loaded."
|
| 314 |
+
if state_data and state_data.get('insights'):
|
| 315 |
+
insights = state_data['insights']
|
| 316 |
+
md = f"## ๐ญ Proactive Insights for `{state_data.get('project_name')}`\n"
|
| 317 |
+
md += f"Dataset has **{state_data['metadata']['shape'][0]} rows** and **{state_data['metadata']['shape'][1]} columns**.\n\n"
|
| 318 |
+
if suggestions := insights.get('ml_suggestions'):
|
| 319 |
+
md += "### ๐ฎ Potential ML Targets\n" + "\n".join(f"- `{s}`" for s in suggestions) + "\n"
|
| 320 |
+
if not insights.get('missing_data', pd.Series()).empty:
|
| 321 |
+
md += "\n### ๐ง Missing Data\nFound missing values in these columns:\n" + insights['missing_data'].to_frame('Missing Count').to_markdown() + "\n"
|
| 322 |
+
helios_md = md
|
| 323 |
|
| 324 |
+
all_cols = state_data.get('metadata', {}).get('columns', [])
|
| 325 |
+
return {
|
| 326 |
+
state: state_data,
|
| 327 |
+
helios_report_md: helios_md,
|
| 328 |
+
lp_target: gr.update(choices=all_cols),
|
| 329 |
+
lp_features: gr.update(choices=all_cols),
|
| 330 |
+
chatbot: state_data.get('chat_history', [])
|
| 331 |
+
}
|
| 332 |
|
| 333 |
+
file_input.upload(prime_data, [file_input, project_name_input], state).then(
|
| 334 |
+
on_upload_or_load, state, [state, helios_report_md, lp_target, lp_features, chatbot]
|
| 335 |
+
)
|
| 336 |
+
load_btn.upload(load_project, load_btn, state).then(
|
| 337 |
+
on_upload_or_load, state, [state, helios_report_md, lp_target, lp_features, chatbot]
|
| 338 |
+
)
|
| 339 |
save_btn.click(save_project, state, project_status)
|
| 340 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 341 |
lp_run_btn.click(prometheus_run_model, [state, lp_target, lp_features, lp_model], [lp_fig1, lp_fig2, lp_report_md])
|
| 342 |
+
|
| 343 |
+
chat_submit.click(
|
| 344 |
+
athena_respond,
|
| 345 |
+
[chat_input, chatbot, state, api_key_input],
|
| 346 |
+
[chatbot, copilot_fig_output, copilot_df_output, state]
|
| 347 |
+
).then(lambda: "", outputs=chat_input)
|
| 348 |
|
| 349 |
return demo
|
| 350 |
|