Spaces:

CosmickVisions
/

Data-Vision

Running

App Files Files Community

CosmickVisions commited on Mar 2

Commit

a0795a8

verified ·

1 Parent(s): 0cf55dc

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -53

app.py CHANGED Viewed

@@ -3,54 +3,27 @@ import pandas as pd
 import numpy as np
 import plotly.express as px
 import plotly.graph_objects as go
-import matplotlib.pyplot as plt #For SHAP charts
 from scipy.stats import pearsonr, spearmanr
 from sklearn.inspection import permutation_importance
 from sklearn.preprocessing import StandardScaler, LabelEncoder
-from sklearn.model_selection import train_test_split
-from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
-from sklearn.metrics import accuracy_score, mean_squared_error
-from ydata_profiling import ProfileReport
-from streamlit_pandas_profiling import st_profile_report
-import joblib
 import shap
 from datetime import datetime
-# --------------------------
-# Page Configuration
-# --------------------------
-st.set_page_config(
-    page_title="DataInsight Pro",
-    page_icon="🔮",
-    layout="wide",
-    initial_sidebar_state="expanded"
-)
-# --------------------------
-# Custom Styling
-# --------------------------
-st.markdown("""
-    <style>
-    .main {background-color: #f8f9fa;}
-    .sidebar .sidebar-content {background-color: #2c3e50;}
-    .stButton>button {background-color: #3498db; color: white;}
-    .stTextInput>div>div>input {border: 1px solid #3498db;}
-    .stSelectbox>div>div>select {border: 1px solid #3498db;}
-    .stSlider>div>div>div>div {background-color: #3498db;}
-    .metric {padding: 15px; background-color: white; border-radius: 10px; box-shadow: 0 2px 5px rgba(0,0,0,0.1);}
-    </style>
-""", unsafe_allow_html=True)
-# --------------------------
-# Session State Initialization
-# --------------------------
-if 'raw_data' not in st.session_state:
-    st.session_state.raw_data = None
-if 'cleaned_data' not in st.session_state:
-    st.session_state.cleaned_data = None
-if 'model' not in st.session_state:
-    st.session_state.model = None
 # --------------------------
 # Helper Functions
@@ -92,7 +65,6 @@ def generate_quality_report(df):
         report['columns'][col] = col_report
     return report
-# Function to train the model (Separated for clarity and reusability)
 def train_model(df, target, features, problem_type, test_size, model_type, model_params, use_grid_search=False):
     """Trains a model with hyperparameter tuning, cross-validation, and customizable model architecture."""
@@ -258,13 +230,12 @@ def train_model(df, target, features, problem_type, test_size, model_type, model
         # Store the column order for prediction purposes
         column_order = X.columns
-        return model, scaler, label_encoder, imputer_numerical, metrics, column_order, importance
     except Exception as e:
         st.error(f"Training failed: {str(e)}")
-        return None, None, None, None, None, None, None
-# Model Validation Function
 def validate_model(model_path, df, target, features, test_size):
     """Loads a model, preprocesses data, and evaluates the model on a validation set."""
     try:
@@ -365,18 +336,18 @@ with st.sidebar:
 # --------------------------
 if app_mode == "Data Upload":
     st.title("📤 Data Upload & Profiling")
     uploaded_file = st.file_uploader("Upload your dataset (CSV/XLSX)", type=["csv", "xlsx"])
     if uploaded_file:
         try:
             if uploaded_file.name.endswith('.csv'):
                 df = pd.read_csv(uploaded_file)
             else:
                 df = pd.read_excel(uploaded_file)
             st.session_state.raw_data = df
             col1, col2, col3 = st.columns(3)
             with col1:
                 st.metric("Rows", df.shape[0])
@@ -384,15 +355,15 @@ if app_mode == "Data Upload":
                 st.metric("Columns", df.shape[1])
             with col3:
                 st.metric("Missing Values", df.isna().sum().sum())
             with st.expander("Data Preview", expanded=True):
                 st.dataframe(df.head(10), use_container_width=True)
             if st.button("Generate Full Profile Report"):
                 with st.spinner("Generating comprehensive analysis..."):
                     pr = ProfileReport(df, explorative=True)
                     st_profile_report(pr)
         except Exception as e:
             st.error(f"Error loading file: {str(e)}")
@@ -406,6 +377,8 @@ elif app_mode == "Data Cleaning":
         st.warning("Please upload data first")
         st.stop()
     # Initialize session state (only if it's not already there)
     if 'data_versions' not in st.session_state:
         st.session_state.data_versions = [st.session_state.raw_data.copy()]

 import numpy as np
 import plotly.express as px
 import plotly.graph_objects as go
+import matplotlib.pyplot as plt  # For SHAP charts
 from scipy.stats import pearsonr, spearmanr
 from sklearn.inspection import permutation_importance
 from sklearn.preprocessing import StandardScaler, LabelEncoder
+from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingClassifier, GradientBoostingRegressor
+from sklearn.neural_network import MLPClassifier, MLPRegressor
+from sklearn.metrics import accuracy_score, mean_squared_error, r2_score, confusion_matrix, classification_report
+from sklearn.impute import SimpleImputer
+import joblib  # For saving and loading models
+import os  # For file directory
 import shap
 from datetime import datetime
+from stqdm import stqdm
+# Constants used (global)
+PATH_FILES = "/".join(('.', "files"))
+# Ensure upload location exists; make dir if it didn't create one.
+if not os.path.isdir("..") / PATH_FILES:
+    os.makedirs("created", 0o777, exist_ok=True)
 # --------------------------
 # Helper Functions
         report['columns'][col] = col_report
     return report
 def train_model(df, target, features, problem_type, test_size, model_type, model_params, use_grid_search=False):
     """Trains a model with hyperparameter tuning, cross-validation, and customizable model architecture."""
         # Store the column order for prediction purposes
         column_order = X.columns
+        return model, scaler, label_encoder, imputer_numerical, metrics, column_order, importance, X_train, y_train # Return X_train and y_train
     except Exception as e:
         st.error(f"Training failed: {str(e)}")
+        return None, None, None, None, None, None, None, None, None
 def validate_model(model_path, df, target, features, test_size):
     """Loads a model, preprocesses data, and evaluates the model on a validation set."""
     try:
 # --------------------------
 if app_mode == "Data Upload":
     st.title("📤 Data Upload & Profiling")
     uploaded_file = st.file_uploader("Upload your dataset (CSV/XLSX)", type=["csv", "xlsx"])
     if uploaded_file:
         try:
             if uploaded_file.name.endswith('.csv'):
                 df = pd.read_csv(uploaded_file)
             else:
                 df = pd.read_excel(uploaded_file)
             st.session_state.raw_data = df
             col1, col2, col3 = st.columns(3)
             with col1:
                 st.metric("Rows", df.shape[0])
                 st.metric("Columns", df.shape[1])
             with col3:
                 st.metric("Missing Values", df.isna().sum().sum())
             with st.expander("Data Preview", expanded=True):
                 st.dataframe(df.head(10), use_container_width=True)
             if st.button("Generate Full Profile Report"):
                 with st.spinner("Generating comprehensive analysis..."):
                     pr = ProfileReport(df, explorative=True)
                     st_profile_report(pr)
         except Exception as e:
             st.error(f"Error loading file: {str(e)}")
         st.warning("Please upload data first")
         st.stop()
+    df = st.session_state.raw_data.copy()  # Ensure df is defined in this section
     # Initialize session state (only if it's not already there)
     if 'data_versions' not in st.session_state:
         st.session_state.data_versions = [st.session_state.raw_data.copy()]