Spaces:

CosmickVisions
/

Data-Vision

Running

App Files Files Community

CosmickVisions commited on Mar 2

Commit

48d1da7

verified ·

1 Parent(s): b72ed9a

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -45

app.py CHANGED Viewed

@@ -3,6 +3,15 @@ import pandas as pd
 import numpy as np
 import plotly.express as px
 import plotly.graph_objects as go
 import matplotlib.pyplot as plt #For SHAP charts
 from scipy.stats import pearsonr, spearmanr
 from sklearn.inspection import permutation_importance
@@ -356,20 +365,20 @@ if app_mode == "Data Upload":
 # --------------------------
 elif app_mode == "Data Cleaning":
     st.title("🧹 Smart Data Cleaning")
-    if st.session_state.raw_data is None:
         st.warning("Please upload data first")
         st.stop()
-    # Use cleaned_data as the base dataframe
-    df = st.session_state.cleaned_data.copy()  # Changed line
-    # Initialize session state
     if 'data_versions' not in st.session_state:
         st.session_state.data_versions = [st.session_state.raw_data.copy()]
     if 'cleaned_data' not in st.session_state:
         st.session_state.cleaned_data = st.session_state.raw_data.copy()
-        st.dataframe(st.session_state.cleaned_data.head(), use_container_width=True, height=300)
     # --------------------------
     # Data Health Dashboard
@@ -394,11 +403,12 @@ elif app_mode == "Data Cleaning":
     # --------------------------
     # Undo Functionality
     # --------------------------
-    if len(st.session_state.data_versions) > 1:
         if st.button("⏮️ Undo Last Action"):
-            st.session_state.data_versions.pop()  # Remove current version
-            st.session_state.cleaned_data = st.session_state.data_versions[-1].copy() # Set data
-            st.success("Last action undone!")
     # --------------------------
     # Missing Value Handling
@@ -439,7 +449,8 @@ elif app_mode == "Data Cleaning":
                         new_df[cols] = new_df[cols].bfill()
                     update_cleaned_data(new_df)
                 except Exception as e:
                     st.error(f"Error: {str(e)}")
         else:
@@ -480,6 +491,7 @@ elif app_mode == "Data Cleaning":
                     new_df[col_to_convert] = pd.to_datetime(new_df[col_to_convert], format=date_format, errors='coerce')
                 update_cleaned_data(new_df)
             except Exception as e:
                 st.error(f"Error: {str(e)}")
@@ -495,6 +507,7 @@ elif app_mode == "Data Cleaning":
                 new_df = df.copy()
                 new_df = new_df.drop(columns=columns_to_drop)
                 update_cleaned_data(new_df)
     # --------------------------
     # Label Encoding
@@ -511,6 +524,7 @@ elif app_mode == "Data Cleaning":
                     new_df[col] = le.fit_transform(new_df[col].astype(str))
                     label_encoders[col] = le
                 update_cleaned_data(new_df)
     # --------------------------
     # StandardScaler
@@ -525,6 +539,7 @@ elif app_mode == "Data Cleaning":
                     scaler = StandardScaler()
                     new_df[scale_cols] = scaler.fit_transform(new_df[scale_cols])
                     update_cleaned_data(new_df)
                 except Exception as e:
                     st.error(f"Error: {str(e)}")
@@ -558,6 +573,7 @@ elif app_mode == "Data Cleaning":
             text_cols = new_df.select_dtypes(include='object').columns
             new_df[text_cols] = new_df[text_cols].apply(lambda x: x.str.strip())
             update_cleaned_data(new_df)
     # --------------------------
     # Cleaned Data Preview
@@ -565,11 +581,10 @@ elif app_mode == "Data Cleaning":
     if st.session_state.get("cleaned_data") is not None:
         enhance_section_title("Cleaned Data Preview", "✨")
         with st.expander("✨ Cleaned Data Preview", expanded=True):
-            st.dataframe(
-                st.session_state.cleaned_data.head(),
-                use_container_width=True,
-                height=400  # <-- Add height parameter here
-            )
 # --------------------------
 # EDA
@@ -577,11 +592,31 @@ elif app_mode == "Data Cleaning":
 elif app_mode == "EDA":
     st.title("🔍 Interactive Data Explorer")
-    if st.session_state.cleaned_data is None:
-        st.warning("Please clean your data first")
-        st.stop()
-    df = st.session_state.cleaned_data
     # --------------------------
     # Enhanced Data Overview
@@ -910,24 +945,32 @@ elif app_mode == "EDA":
 elif app_mode == "Model Training":
     st.title("🤖 Intelligent Model Training")
-    if st.session_state.get("cleaned_data") is None:
-        st.warning("Please clean your data first")
-            # Show Upload Clean Data button
-        uploaded_clean_file = st.file_uploader("Upload your cleaned dataset (CSV/XLSX)", type=["csv", "xlsx"])
-        if uploaded_clean_file:
-            try:
-                if uploaded_clean_file.name.endswith('.csv'):
-                    df = pd.read_csv(uploaded_clean_file)
-                else:
-                    df = pd.read_excel(uploaded_clean_file)
-                st.session_state.cleaned_data = df
-                st.success("Cleaned data uploaded successfully!")
-            except Exception as e:
-                st.error(f"Error loading file: {str(e)}")
-        st.stop()
-    df = st.session_state.cleaned_data
     # Model Setup
     col1, col2, col3 = st.columns(3)
     with col1:
@@ -996,16 +1039,21 @@ elif app_mode == "Model Training":
     use_grid_search = st.checkbox("Use Grid Search for Hyperparameter Tuning")
     if st.button("Train Model"):
         if not features:
             st.error("Please select at least one feature.")
             st.stop()
         # Call the training function
-        model, scaler, label_encoder, imputer_numerical, metrics, column_order, importance = train_model(df.copy(), target, features, problem_type, test_size, model_type, model_params, use_grid_search) # Pass a copy to avoid modifying the original
-        if model: # Only proceed if training was successful
-            st.success("Model trained successfully!")
             # Display Metrics
             st.subheader("Model Evaluation Metrics")
@@ -1109,10 +1157,22 @@ elif app_mode == "Model Training":
 # Predictions Section (Fixed)
 if app_mode == "Predictions":
     st.title("�� Predictive Analytics - Informed Business Decisions")
-    if st.session_state.get("model") is None:
-        st.warning("Please train a model first")
         st.stop()
     model_data = st.session_state.model  # Get the entire dictionary
     model = model_data['model']  # Access model

 import numpy as np
 import plotly.express as px
 import plotly.graph_objects as go
+from sklearn.impute import SimpleImputer
+from sklearn.model_selection import GridSearchCV
+from sklearn.linear_model import LogisticRegression
+from sklearn.svm import SVC
+from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
+from sklearn.neural_network import MLPRegressor, MLPClassifier
+from sklearn.metrics import confusion_matrix, classification_report, r2_score
+from sklearn.model_selection import cross_val_score
+import scipy.stats as stats
 import matplotlib.pyplot as plt #For SHAP charts
 from scipy.stats import pearsonr, spearmanr
 from sklearn.inspection import permutation_importance
 # --------------------------
 elif app_mode == "Data Cleaning":
     st.title("🧹 Smart Data Cleaning")
+    # Check for raw data FIRST
+    if 'raw_data' not in st.session_state:
         st.warning("Please upload data first")
         st.stop()
+    # Initialize data_versions and cleaned_data together
     if 'data_versions' not in st.session_state:
         st.session_state.data_versions = [st.session_state.raw_data.copy()]
     if 'cleaned_data' not in st.session_state:
         st.session_state.cleaned_data = st.session_state.raw_data.copy()
+    # Now safely use cleaned_data
+    df = st.session_state.cleaned_data.copy()
     # --------------------------
     # Data Health Dashboard
     # --------------------------
     # Undo Functionality
     # --------------------------
+    # In Data Cleaning page's Undo section:
+    if 'data_versions' in st.session_state and len(st.session_state.data_versions) > 1:
         if st.button("⏮️ Undo Last Action"):
+            st.session_state.data_versions.pop()
+            st.session_state.cleaned_data = st.session_state.data_versions[-1].copy()
+            st.rerun()
     # --------------------------
     # Missing Value Handling
                         new_df[cols] = new_df[cols].bfill()
                     update_cleaned_data(new_df)
+                    st.rerun() #Force re-run after apply
                 except Exception as e:
                     st.error(f"Error: {str(e)}")
         else:
                     new_df[col_to_convert] = pd.to_datetime(new_df[col_to_convert], format=date_format, errors='coerce')
                 update_cleaned_data(new_df)
+                st.rerun() #Force re-run after apply
             except Exception as e:
                 st.error(f"Error: {str(e)}")
                 new_df = df.copy()
                 new_df = new_df.drop(columns=columns_to_drop)
                 update_cleaned_data(new_df)
+                st.rerun() #Force re-run after apply
     # --------------------------
     # Label Encoding
                     new_df[col] = le.fit_transform(new_df[col].astype(str))
                     label_encoders[col] = le
                 update_cleaned_data(new_df)
+                st.rerun() #Force re-run after apply
     # --------------------------
     # StandardScaler
                     scaler = StandardScaler()
                     new_df[scale_cols] = scaler.fit_transform(new_df[scale_cols])
                     update_cleaned_data(new_df)
+                    st.rerun()#Force re-run after apply
                 except Exception as e:
                     st.error(f"Error: {str(e)}")
             text_cols = new_df.select_dtypes(include='object').columns
             new_df[text_cols] = new_df[text_cols].apply(lambda x: x.str.strip())
             update_cleaned_data(new_df)
+            st.rerun() #Force re-run after apply
     # --------------------------
     # Cleaned Data Preview
     if st.session_state.get("cleaned_data") is not None:
         enhance_section_title("Cleaned Data Preview", "✨")
         with st.expander("✨ Cleaned Data Preview", expanded=True):
+            st.dataframe(st.session_state.cleaned_data.head(), use_container_width=True)
 # --------------------------
 # EDA
 elif app_mode == "EDA":
     st.title("🔍 Interactive Data Explorer")
+      # Universal check for all dependent pages
+    if 'cleaned_data' not in st.session_state:
+        st.warning("No cleaned data found! Please either:")
+        col1, col2 = st.columns(2)
+        with col1:
+            if st.button("↩️ Go to Data Cleaning"):
+                st.session_state.app_mode = "Data Cleaning"
+                st.experimental_rerun()
+        with col2:
+            uploaded_clean = st.file_uploader("📤 Or upload clean data",
+                                             type=["csv", "xlsx"])
+            if uploaded_clean:
+                try:
+                    st.session_state.cleaned_data = pd.read_csv(uploaded_clean)
+                    st.success("Loaded clean data!")
+                    st.experimental_rerun()
+                except Exception as e:
+                    st.error(f"Invalid file: {str(e)}")
+        st.stop()  # Halt execution until resolved
+    # Only reaches here if cleaned_data exists
+    df = st.session_state.cleaned_data.copy()
     # --------------------------
     # Enhanced Data Overview
 elif app_mode == "Model Training":
     st.title("🤖 Intelligent Model Training")
+     # Universal check for all dependent pages
+    if 'cleaned_data' not in st.session_state:
+        st.warning("No cleaned data found! Please either:")
+        col1, col2 = st.columns(2)
+        with col1:
+            if st.button("↩️ Go to Data Cleaning"):
+                st.session_state.app_mode = "Data Cleaning"
+                st.experimental_rerun()
+        with col2:
+            uploaded_clean = st.file_uploader("📤 Or upload clean data",
+                                             type=["csv", "xlsx"])
+            if uploaded_clean:
+                try:
+                    st.session_state.cleaned_data = pd.read_csv(uploaded_clean)
+                    st.success("Loaded clean data!")
+                    st.experimental_rerun()
+                except Exception as e:
+                    st.error(f"Invalid file: {str(e)}")
+        st.stop()  # Halt execution until resolved
+    # Only reaches here if cleaned_data exists
+    df = st.session_state.cleaned_data.copy()
     # Model Setup
     col1, col2, col3 = st.columns(3)
     with col1:
     use_grid_search = st.checkbox("Use Grid Search for Hyperparameter Tuning")
+    # In Model Training section - Fix indentation for training logic
     if st.button("Train Model"):
         if not features:
             st.error("Please select at least one feature.")
             st.stop()
+        # INDENT ALL THIS CODE UNDER THE BUTTON CLICK
         # Call the training function
+        model, scaler, label_encoder, imputer_numerical, metrics, column_order, importance, X_train, y_train = train_model(
+            df.copy(), target, features, problem_type, test_size, model_type, model_params, use_grid_search
+        )
+    if model:  # Only proceed if training was successful
+        st.success("Model trained successfully!")
+        # ... rest of model display code ...
             # Display Metrics
             st.subheader("Model Evaluation Metrics")
 # Predictions Section (Fixed)
 if app_mode == "Predictions":
     st.title("�� Predictive Analytics - Informed Business Decisions")
+    st.warning("Note: SHAP explanations currently work best with tree-based models like Random Forest")
+    # Add model upload section
+    uploaded_model = st.file_uploader("Upload trained model", type="joblib")
+    if uploaded_model:
+        try:
+            st.session_state.model = joblib.load(uploaded_model)
+            st.success("Model loaded successfully!")
+        except:
+            st.error("Invalid model file")
+    if 'model' not in st.session_state:
+        st.warning("Please load a trained model first")
         st.stop()
+    # Rest of your predictions code...
     model_data = st.session_state.model  # Get the entire dictionary
     model = model_data['model']  # Access model