|
|
|
|
|
|
|
import gradio as gr |
|
import pandas as pd |
|
import numpy as np |
|
import plotly.express as px |
|
import plotly.graph_objects as go |
|
import io, os, json, base64, logging, warnings, pickle, uuid |
|
from contextlib import redirect_stdout |
|
from datetime import datetime |
|
|
|
|
|
from sklearn.model_selection import cross_val_score, train_test_split |
|
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor |
|
from sklearn.linear_model import LogisticRegression, LinearRegression |
|
from sklearn.metrics import roc_curve, auc, confusion_matrix, r2_score, mean_squared_error |
|
from sklearn.preprocessing import LabelEncoder |
|
from sklearn.impute import KNNImputer |
|
|
|
|
|
warnings.filterwarnings('ignore') |
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
|
|
|
|
|
THEME = gr.themes.Monochrome(primary_hue="indigo", secondary_hue="blue", neutral_hue="slate").set( |
|
body_background_fill="radial-gradient(circle, rgba(20,20,80,1) 0%, rgba(0,0,10,1) 100%);", |
|
block_label_background_fill="rgba(255,255,255,0.05)", |
|
block_background_fill="rgba(255,255,255,0.05)", |
|
button_primary_background_fill="linear-gradient(90deg, #6A11CB 0%, #2575FC 100%)", |
|
button_secondary_background_fill="linear-gradient(90deg, #556270 0%, #4ECDC4 100%)", |
|
color_accent_soft="rgba(255,255,255,0.2)" |
|
) |
|
ICONS = {"overview": "π", "medic": "π§ͺ", "launchpad": "π", "copilot": "π‘", "export": "π"} |
|
|
|
|
|
def init_state(): |
|
"""Initializes a blank global state.""" |
|
return { |
|
"project_name": None, |
|
"df_original": None, |
|
"df_modified": None, |
|
"metadata": None, |
|
"insights": None, |
|
"chat_history": [], |
|
"dynamic_dashboards": {} |
|
} |
|
|
|
def save_project(state): |
|
"""Saves the entire application state to a .osyssey file.""" |
|
if not state or not state.get("project_name"): |
|
return gr.update(value="Project needs a name to save.", interactive=True) |
|
|
|
filename = f"{state['project_name']}.odyssey" |
|
|
|
state_to_save = state.copy() |
|
if state_to_save['df_original'] is not None: |
|
state_to_save['df_original'] = state_to_save['df_original'].to_pickle() |
|
if state_to_save['df_modified'] is not None: |
|
state_to_save['df_modified'] = state_to_save['df_modified'].to_pickle() |
|
|
|
with open(filename, "wb") as f: |
|
pickle.dump(state_to_save, f) |
|
|
|
return gr.update(value=f"Project saved to {filename}", interactive=True) |
|
|
|
def load_project(file_obj): |
|
"""Loads a .odyssey file into the application state.""" |
|
if not file_obj: return init_state() |
|
with open(file_obj.name, "rb") as f: |
|
loaded_state = pickle.load(f) |
|
|
|
|
|
if loaded_state['df_original'] is not None: |
|
loaded_state['df_original'] = pd.read_pickle(io.BytesIO(loaded_state['df_original'])) |
|
if loaded_state['df_modified'] is not None: |
|
loaded_state['df_modified'] = pd.read_pickle(io.BytesIO(loaded_state['df_modified'])) |
|
|
|
return loaded_state |
|
|
|
def prime_data(file_obj, project_name): |
|
"""Main function to load a new CSV, analyze it, and set the initial state.""" |
|
if not file_obj: return init_state() |
|
df = pd.read_csv(file_obj.name) |
|
|
|
|
|
for col in df.select_dtypes(include=['object']).columns: |
|
try: |
|
df[col] = pd.to_datetime(df[col], errors='raise') |
|
except (ValueError, TypeError): |
|
if 0.5 > df[col].nunique() / len(df) > 0.0: |
|
df[col] = df[col].astype('category') |
|
|
|
metadata = extract_metadata(df) |
|
insights = run_helios_engine(df, metadata) |
|
|
|
return { |
|
"project_name": project_name or f"Project_{datetime.now().strftime('%Y%m%d_%H%M')}", |
|
"df_original": df, |
|
"df_modified": df.copy(), |
|
"metadata": metadata, |
|
"insights": insights, |
|
"chat_history": [], |
|
"dynamic_dashboards": {} |
|
} |
|
|
|
def extract_metadata(df): |
|
"""Utility to get schema and column types.""" |
|
return { |
|
'shape': df.shape, |
|
'columns': df.columns.tolist(), |
|
'numeric': df.select_dtypes(include=np.number).columns.tolist(), |
|
'categorical': df.select_dtypes(include=['object', 'category']).columns.tolist(), |
|
'datetime': df.select_dtypes(include='datetime').columns.tolist(), |
|
'dtypes': df.dtypes.apply(lambda x: x.name).to_dict() |
|
} |
|
|
|
|
|
def run_helios_engine(df, metadata): |
|
"""The proactive analysis engine.""" |
|
insights = {} |
|
|
|
missing = df.isnull().sum() |
|
insights['missing_data'] = missing[missing > 0].sort_values(ascending=False) |
|
|
|
insights['high_cardinality'] = {c: df[c].nunique() for c in metadata['categorical'] if df[c].nunique() > 50} |
|
|
|
outliers = {} |
|
for col in metadata['numeric']: |
|
Q1, Q3 = df[col].quantile(0.25), df[col].quantile(0.75) |
|
IQR = Q3 - Q1 |
|
count = ((df[col] < (Q1 - 1.5 * IQR)) | (df[col] > (Q3 + 1.5 * IQR))).sum() |
|
if count > 0: outliers[col] = count |
|
insights['outliers'] = outliers |
|
|
|
suggestions = [] |
|
for col in metadata['categorical']: |
|
if df[col].nunique() == 2: suggestions.append(f"{col} (Classification)") |
|
for col in metadata['numeric']: |
|
if df[col].nunique() > 20: suggestions.append(f"{col} (Regression)") |
|
insights['ml_suggestions'] = suggestions |
|
return insights |
|
|
|
|
|
def medic_preview_imputation(state, col, num_method, cat_method): |
|
if not col or col not in state['df_modified'].columns: return None |
|
df_mod = state['df_modified'].copy() |
|
|
|
if col in state['metadata']['numeric']: |
|
if num_method == 'KNN': |
|
imputer = KNNImputer(n_neighbors=5) |
|
df_mod[col] = imputer.fit_transform(df_mod[[col]]) |
|
else: |
|
value = df_mod[col].mean() if num_method == 'mean' else df_mod[col].median() |
|
df_mod[col].fillna(value, inplace=True) |
|
|
|
fig = go.Figure() |
|
fig.add_trace(go.Histogram(x=state['df_original'][col], name='Original', opacity=0.7)) |
|
fig.add_trace(go.Histogram(x=df_mod[col], name='Imputed', opacity=0.7)) |
|
fig.update_layout(barmode='overlay', title_text=f"Distribution for '{col}'", legend_title_text='Dataset') |
|
return fig |
|
|
|
elif col in state['metadata']['categorical']: |
|
if cat_method == "Create 'Missing' Category": |
|
df_mod[col] = df_mod[col].cat.add_categories("Missing").fillna("Missing") if hasattr(df_mod[col], 'cat') else df_mod[col].fillna("Missing") |
|
else: |
|
df_mod[col].fillna(df_mod[col].mode()[0], inplace=True) |
|
|
|
fig = go.Figure() |
|
fig.add_trace(go.Bar(x=state['df_original'][col].value_counts().index, y=state['df_original'][col].value_counts().values, name='Original')) |
|
fig.add_trace(go.Bar(x=df_mod[col].value_counts().index, y=df_mod[col].value_counts().values, name='Imputed')) |
|
return fig |
|
return None |
|
|
|
|
|
def prometheus_run_model(state, target, features, model_name): |
|
if not target or not features: return None, None, "Select target and features." |
|
df = state['df_modified'].copy() |
|
df.dropna(subset=[target] + features, inplace=True) |
|
|
|
le_map = {} |
|
for col in [target] + features: |
|
if df[col].dtype.name in ['category', 'object']: |
|
le = LabelEncoder() |
|
df[col] = le.fit_transform(df[col]) |
|
le_map[col] = le |
|
|
|
X, y = df[features], df[target] |
|
problem_type = "Classification" if y.nunique() <= 10 else "Regression" |
|
|
|
MODELS = { |
|
"Classification": {"Random Forest": RandomForestClassifier, "Logistic Regression": LogisticRegression}, |
|
"Regression": {"Random Forest": RandomForestRegressor, "Linear Regression": LinearRegression} |
|
} |
|
if model_name not in MODELS[problem_type]: return None, None, "Invalid model for this problem type." |
|
|
|
model = MODELS[problem_type][model_name](random_state=42) |
|
|
|
if problem_type == "Classification": |
|
scores = cross_val_score(model, X, y, cv=5, scoring='accuracy') |
|
report = f"**Cross-Validated Accuracy:** {np.mean(scores):.3f} Β± {np.std(scores):.3f}" |
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) |
|
model.fit(X_train, y_train) |
|
|
|
|
|
y_prob = model.predict_proba(X_test)[:, 1] |
|
fpr, tpr, _ = roc_curve(y_test, y_prob) |
|
roc_auc = auc(fpr, tpr) |
|
fig1 = go.Figure(data=go.Scatter(x=fpr, y=tpr, mode='lines', name=f'ROC curve (area = {roc_auc:.2f})')) |
|
fig1.add_scatter(x=[0, 1], y=[0, 1], mode='lines', line=dict(dash='dash'), name='Random Chance') |
|
fig1.update_layout(title="ROC Curve") |
|
|
|
|
|
if hasattr(model, 'feature_importances_'): |
|
fi = pd.Series(model.feature_importances_, index=features).sort_values(ascending=False) |
|
fig2 = px.bar(fi, title="Feature Importance") |
|
else: fig2 = go.Figure().update_layout(title="Feature Importance (Not available for this model)") |
|
|
|
return fig1, fig2, report |
|
else: |
|
scores = cross_val_score(model, X, y, cv=5, scoring='r2') |
|
report = f"**Cross-Validated RΒ² Score:** {np.mean(scores):.3f} Β± {np.std(scores):.3f}" |
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) |
|
model.fit(X_train, y_train) |
|
preds = model.predict(X_test) |
|
|
|
|
|
residuals = y_test - preds |
|
fig1 = px.scatter(x=preds, y=residuals, title="Residuals vs. Predicted Plot", labels={'x': 'Predicted Values', 'y': 'Residuals'}) |
|
fig1.add_hline(y=0, line_dash="dash") |
|
|
|
|
|
if hasattr(model, 'feature_importances_'): |
|
fi = pd.Series(model.feature_importances_, index=features).sort_values(ascending=False) |
|
fig2 = px.bar(fi, title="Feature Importance") |
|
else: fig2 = go.Figure().update_layout(title="Feature Importance (Not available for this model)") |
|
|
|
return fig1, fig2, report |
|
|
|
|
|
def athena_respond(user_message, history, state, api_key): |
|
|
|
pass |
|
|
|
def render_dynamic_dashboard(state, dashboard_id): |
|
"""Renders a dynamically generated dashboard from the state.""" |
|
|
|
|
|
if dashboard_id in state['dynamic_dashboards']: |
|
|
|
|
|
return gr.Markdown(f"### Dashboard: {dashboard_id}\n(Dynamic rendering placeholder)") |
|
return gr.Markdown("Dashboard not found.") |
|
|
|
|
|
def build_ui(): |
|
with gr.Blocks(theme=THEME, title="Odyssey AI Data Workspace") as demo: |
|
state = gr.State(init_state()) |
|
|
|
with gr.Row(): |
|
|
|
with gr.Column(scale=1): |
|
gr.Markdown("# π¦ Odyssey") |
|
|
|
with gr.Accordion("π Project", open=True): |
|
project_name_input = gr.Textbox(label="Project Name", value="New_Project") |
|
file_input = gr.File(label="Upload CSV", file_types=[".csv"]) |
|
with gr.Row(): |
|
save_btn = gr.Button("Save") |
|
load_btn = gr.UploadButton("Load .odyssey") |
|
project_status = gr.Markdown() |
|
|
|
|
|
overview_btn = gr.Button(f"{ICONS['overview']} Helios Overview") |
|
medic_btn = gr.Button(f"{ICONS['medic']} Asclepius Data Lab") |
|
launchpad_btn = gr.Button(f"{ICONS['launchpad']} Prometheus Launchpad") |
|
copilot_btn = gr.Button(f"{ICONS['copilot']} Athena Co-pilot") |
|
export_btn = gr.Button(f"{ICONS['export']} Export Report") |
|
|
|
|
|
with gr.Accordion("Global Info", open=False): |
|
file_info_md = gr.Markdown("No file loaded.") |
|
|
|
|
|
with gr.Column(scale=4): |
|
|
|
with gr.Column(visible=True) as overview_panel: |
|
gr.Markdown(f"# {ICONS['overview']} Helios Overview") |
|
gr.Markdown("A proactive, high-level summary of your dataset.") |
|
|
|
helios_report_md = gr.Markdown("Upload data to begin analysis.") |
|
|
|
|
|
with gr.Column(visible=False) as medic_panel: |
|
gr.Markdown(f"# {ICONS['medic']} Asclepius Data Lab") |
|
gr.Markdown("Interactively clean and prepare your data.") |
|
|
|
medic_col_select = gr.Dropdown(label="Select Column to Clean") |
|
with gr.Row(): |
|
medic_num_method = gr.Radio(['mean', 'median', 'KNN'], label="Numeric Imputation", value='mean') |
|
medic_cat_method = gr.Radio(['mode', "Create 'Missing' Category"], label="Categorical Imputation", value='mode') |
|
medic_preview_plot = gr.Plot() |
|
medic_apply_btn = gr.Button("Apply Changes to Session") |
|
|
|
|
|
with gr.Column(visible=False) as launchpad_panel: |
|
gr.Markdown(f"# {ICONS['launchpad']} Prometheus Launchpad") |
|
gr.Markdown("Train, evaluate, and understand predictive models.") |
|
|
|
with gr.Row(): |
|
lp_target = gr.Dropdown(label="π― Target") |
|
lp_features = gr.Multiselect(label="β¨ Features") |
|
lp_model = gr.Dropdown(choices=["Random Forest", "Logistic Regression", "Linear Regression"], label="π§ Model") |
|
lp_run_btn = gr.Button("π Launch Model Training (with CV)") |
|
lp_report_md = gr.Markdown() |
|
with gr.Row(): |
|
lp_fig1 = gr.Plot() |
|
lp_fig2 = gr.Plot() |
|
|
|
|
|
with gr.Column(visible=False) as copilot_panel: |
|
gr.Markdown(f"# {ICONS['copilot']} Athena Co-pilot") |
|
gr.Markdown("Your collaborative AI data scientist. Ask anything.") |
|
|
|
chatbot = gr.Chatbot(height=500) |
|
with gr.Accordion("AI Generated Dashboard", open=False) as dynamic_dash_accordion: |
|
dynamic_dash_output = gr.Group() |
|
chat_input = gr.Textbox(label="Your Request") |
|
chat_submit = gr.Button("Send", variant="primary") |
|
|
|
|
|
|
|
|
|
panels = [overview_panel, medic_panel, launchpad_panel, copilot_panel] |
|
def switch_panel(btn_idx): |
|
return [gr.update(visible=i == btn_idx) for i in range(len(panels))] |
|
|
|
overview_btn.click(lambda: switch_panel(0), None, panels) |
|
medic_btn.click(lambda: switch_panel(1), None, panels) |
|
launchpad_btn.click(lambda: switch_panel(2), None, panels) |
|
copilot_btn.click(lambda: switch_panel(3), None, panels) |
|
|
|
|
|
def on_upload(state, file, name): |
|
new_state = prime_data(file, name) |
|
|
|
helios_md = "No data loaded." |
|
if new_state.get('insights'): |
|
helios_md = f"### {ICONS['ml_suggestions']} ML Suggestions\n" + "\n".join([f"- `{s}`" for s in new_state['insights']['ml_suggestions']]) |
|
|
|
|
|
file_info = f"**File:** `{os.path.basename(file.name)}`\n\n**Shape:** `{new_state['metadata']['shape']}`" |
|
|
|
all_cols = new_state['metadata']['columns'] |
|
missing_cols = new_state['insights']['missing_data'].index.tolist() |
|
|
|
return new_state, helios_md, file_info, gr.update(choices=missing_cols), gr.update(choices=all_cols), gr.update(choices=all_cols) |
|
|
|
file_input.upload(on_upload, [state, file_input, project_name_input], [state, helios_report_md, file_info_md, medic_col_select, lp_target, lp_features]) |
|
|
|
|
|
save_btn.click(save_project, state, project_status) |
|
|
|
|
|
medic_col_select.change(medic_preview_imputation, [state, medic_col_select, medic_num_method, medic_cat_method], medic_preview_plot) |
|
medic_num_method.change(medic_preview_imputation, [state, medic_col_select, medic_num_method, medic_cat_method], medic_preview_plot) |
|
medic_cat_method.change(medic_preview_imputation, [state, medic_col_select, medic_num_method, medic_cat_method], medic_preview_plot) |
|
|
|
|
|
lp_run_btn.click(prometheus_run_model, [state, lp_target, lp_features, lp_model], [lp_fig1, lp_fig2, lp_report_md]) |
|
|
|
return demo |
|
|
|
|
|
if __name__ == "__main__": |
|
app = build_ui() |
|
app.launch(debug=True) |